Mercurial > hg > cc > cirrus_work
annotate bin/_runme.sh @ 252:39c3835716f3
run with login shell
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 17 Jan 2025 20:35:21 +0000 |
parents | 235004978b22 |
children |
rev | line source |
---|---|
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/bin/bash |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 # This runs on the compute nodes... |
68
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
3 # Args: [-a i] wd [-d] [-t nthreads] [-b CMDS] [-i input] CMDS |
38 | 4 # See ug4/azure/notes.txt for documentation |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 N=$SLURM_JOB_NUM_NODES |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 n=$SLURM_NTASKS |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 c=$SLURM_CPUS_PER_TASK |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 nodename=$SLURMD_NODENAME |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 local=$SLURM_LOCALID |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 node=$SLURM_NODEID |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
12 task=$SLURM_PROCID |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 |
68
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
14 if [ "$1" = "-a" ] |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
15 then |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
16 shift |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
17 xarg="$1" |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
18 shift |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
19 else |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
20 xarg= |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
21 fi |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
22 |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 cd "$1" |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
24 shift |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
25 |
40
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
26 if [ "$1" = "-d" ] |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
27 then |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
28 shift |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
29 debug=1 |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
30 fi |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
31 |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
32 if [ "$1" = "-t" ] |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
33 then |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
34 shift |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
35 threadsPerTask=$1 |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
36 shift |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
37 else |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
38 threadsPerTask=2 |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
39 fi |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
40 |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
41 pjobs=$((c / $threadsPerTask)) |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
42 |
40
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
43 echo $(date) task $n.$task on $nodename:$N.$node start $pjobs jobs 1>&2 |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
44 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
45 PATH=$W/$USER/bin:$W/shared/bin:$PATH |
68
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
46 export task PATH n xarg |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
47 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
48 if [ "$1" = "-b" ] |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
49 then |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
50 shift |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
51 eval "$1" |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
52 shift |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
53 fi |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
54 |
68
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
55 input="seq 1 $pjobs" # default to make sure something runs |
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
56 |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
57 if [ "$1" = "-i" ] |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
58 then |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
59 shift |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
60 input="$1" |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
61 shift |
40
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
62 if [ "$debug" ] |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
63 then |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
64 echo $(date) task $n.$task input "|$input|" |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
65 fi |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
66 fi |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
67 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
68 export cmd="$1" |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
69 shift |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
70 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
71 doit () { |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
72 arg="$1" |
68
235004978b22
add support for multiple calls to srun with a counter
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
40
diff
changeset
|
73 echo $(date) start $task $PARALLEL_SEQ "|$cmd|$xarg|$arg|" |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
74 eval "$cmd" |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
75 echo $(date) end $task $PARALLEL_SEQ |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
76 } |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
77 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
78 export -f doit |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
79 |
40
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
80 if [ "$debug" ] |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
81 then |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
82 echo $(date) task $n.$task cmd "|$cmd|" doit $(type doit) |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
83 fi |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
84 |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
85 eval "$input" | \ |
40
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
86 if [ "$debug" ] |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
87 then |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
88 tee >(cat 1>&2) |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
89 else |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
90 cat |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
91 fi | \ |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
92 if [ $pjobs -le 1 ] |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
93 then |
252 | 94 xargs -I ^ -n 1 bash -lc "doit ^" |
40
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
95 else |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
96 parallel --line-buffer -j $pjobs doit '{}' |
316495371bbc
add lots more debugging output,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
38
diff
changeset
|
97 fi |
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
98 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
99 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
100 |