Mercurial > hg > cc > cirrus_home
changeset 84:2fc33145242b
prepare for real parallel distribution
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 14 Mar 2021 21:28:02 +0000 |
parents | 8fb11e337e23 |
children | e5d5958bf3fe |
files | bin/atest.sh bin/masterMaster.sh masterJob.sh |
diffstat | 3 files changed, 22 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/atest.sh Sun Mar 14 21:28:02 2021 +0000 @@ -0,0 +1,11 @@ +#!/bin/bash +# Invoke this as e.g. sbatch -n 8 -c 5 masterJob.sh atest +n=$SLURM_NODEID +id=$SLURM_LOCALID +#printenv | fgrep SLURM +task=$SLURM_PROCID +echo $(date) $(cat atest/$task.txt) start +echo I am host $(hostname) node $n, cpu "$PMI_FD", local $id, task $task +sleep 4 +echo $(date) $(cat atest/$id.txt) end +
--- a/bin/masterMaster.sh Sun Mar 14 21:25:01 2021 +0000 +++ b/bin/masterMaster.sh Sun Mar 14 21:28:02 2021 +0000 @@ -1,9 +1,10 @@ #!/bin/bash # This runs on login machine to launch the real task (named by $1) on two machines # Any further args are passed to $task.sh -echo $(date) Launching master workers for "$@" +echo $(date) Launching master workers for "$@" from $(hostname) task=$1 shift -nodes=($(scontrol show hostname "$SLURM_JOB_NODELIST")) -seq 1 -1 0 | parallel --will-cite --joblog master_$task.log -S ${nodes[1]} -S ${nodes[0]} -N 1 --workdir /dev/shm --transferfile ${task}_'{}'.txt $* $HOME/bin/$task.sh '{}' "$@" +#nodes=($(scontrol show hostname "$SLURM_JOB_NODELIST")) +#seq 1 -1 0 | $HOME/gentoo/usr/bin/parallel --will-cite --joblog master_$task.log -S "ssh -o StrictHostKeyChecking=accept-new ${nodes[1]}" -S "ssh -o StrictHostKeyChecking=accept-new ${nodes[0]}" -N 1 --workdir /dev/shm --transferfile ${task}_'{}'.txt $* $HOME/bin/$task.sh '{}' "$@" +srun $HOME/bin/$task.sh "$@" echo $(date) $task "$@" workers done
--- a/masterJob.sh Sun Mar 14 21:25:01 2021 +0000 +++ b/masterJob.sh Sun Mar 14 21:28:02 2021 +0000 @@ -1,20 +1,17 @@ #!/bin/bash # Attempt at a master Master job -# Usage: sbatch [--nodes=n] [--tasks-per-node=n] masterJob.sh taskName [args...] -# Implies that ~/taskName_{0,1}.txt contain the necessary parameterisation -#SBATCH --nodes=2 -#SBATCH --exclusive -#SBATCH --tasks-per-node 36 -#SBATCH --cpus-per-task 1 +# Usage: sbatch [--exclusive --nodes= --tasks-per-node= | -n ... -c ...] masterJob.sh taskName [args...] +# Note that --exclusive is _not_ set by default +# Use -n n -c c for to launch n 'parallel ...' jobs each of which spawns c actual workers which +# divide up the job task file task file (see below) between them. +# Implies that ~/taskName_{0..n}.txt or ~/taskName/{0..n}.txt contain the necessary parameterisation #SBATCH --time=00:01:00 #SBATCH --partition=standard -#SBATCH --qos=short -#SBATCH --account=ec166-guest +#SBATCH --qos=standard +#SBATCH --account=dc007 #SBATCH --job-name stest cd ${SLURM_SUBMIT_DIR} -module load gnu-parallel - bin/masterMaster.sh "$@"