annotate bin/_runme.sh @ 154:5d30cd8c6254

fix typo
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 11 Oct 2023 12:50:29 +0100
parents 235004978b22
children 39c3835716f3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/bin/bash
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # This runs on the compute nodes...
68
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
3 # Args: [-a i] wd [-d] [-t nthreads] [-b CMDS] [-i input] CMDS
38
e3c440666f1a doc pointer
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 2
diff changeset
4 # See ug4/azure/notes.txt for documentation
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 N=$SLURM_JOB_NUM_NODES
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 n=$SLURM_NTASKS
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 c=$SLURM_CPUS_PER_TASK
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 nodename=$SLURMD_NODENAME
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 local=$SLURM_LOCALID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 node=$SLURM_NODEID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 task=$SLURM_PROCID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13
68
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
14 if [ "$1" = "-a" ]
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
15 then
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
16 shift
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
17 xarg="$1"
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
18 shift
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
19 else
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
20 xarg=
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
21 fi
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
22
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 cd "$1"
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24 shift
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25
40
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
26 if [ "$1" = "-d" ]
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
27 then
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
28 shift
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
29 debug=1
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
30 fi
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
31
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
32 if [ "$1" = "-t" ]
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
33 then
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
34 shift
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
35 threadsPerTask=$1
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
36 shift
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
37 else
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
38 threadsPerTask=2
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
39 fi
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
40
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
41 pjobs=$((c / $threadsPerTask))
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42
40
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
43 echo $(date) task $n.$task on $nodename:$N.$node start $pjobs jobs 1>&2
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
44
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
45 PATH=$W/$USER/bin:$W/shared/bin:$PATH
68
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
46 export task PATH n xarg
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
47
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
48 if [ "$1" = "-b" ]
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
49 then
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
50 shift
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
51 eval "$1"
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
52 shift
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
53 fi
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
54
68
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
55 input="seq 1 $pjobs" # default to make sure something runs
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
56
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
57 if [ "$1" = "-i" ]
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
58 then
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
59 shift
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
60 input="$1"
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
61 shift
40
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
62 if [ "$debug" ]
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
63 then
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
64 echo $(date) task $n.$task input "|$input|"
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
65 fi
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
66 fi
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
67
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
68 export cmd="$1"
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
69 shift
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
70
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
71 doit () {
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
72 arg="$1"
68
235004978b22 add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 40
diff changeset
73 echo $(date) start $task $PARALLEL_SEQ "|$cmd|$xarg|$arg|"
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
74 eval "$cmd"
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
75 echo $(date) end $task $PARALLEL_SEQ
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
76 }
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
77
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
78 export -f doit
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
79
40
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
80 if [ "$debug" ]
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
81 then
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
82 echo $(date) task $n.$task cmd "|$cmd|" doit $(type doit)
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
83 fi
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
84
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
85 eval "$input" | \
40
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
86 if [ "$debug" ]
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
87 then
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
88 tee >(cat 1>&2)
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
89 else
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
90 cat
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
91 fi | \
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
92 if [ $pjobs -le 1 ]
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
93 then
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
94 xargs -I ^ -n 1 bash -c "doit ^"
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
95 else
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
96 parallel --line-buffer -j $pjobs doit '{}'
316495371bbc add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 38
diff changeset
97 fi
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
98
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
99 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
100