# HG changeset patch # User Henry S. Thompson # Date 1615757282 0 # Node ID 2fc33145242bd1b5970e3af7bce4de6d28dd9ba7 # Parent 8fb11e337e23fa87ef5f76b9991b6a9410024a4f prepare for real parallel distribution diff -r 8fb11e337e23 -r 2fc33145242b bin/atest.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/atest.sh Sun Mar 14 21:28:02 2021 +0000 @@ -0,0 +1,11 @@ +#!/bin/bash +# Invoke this as e.g. sbatch -n 8 -c 5 masterJob.sh atest +n=$SLURM_NODEID +id=$SLURM_LOCALID +#printenv | fgrep SLURM +task=$SLURM_PROCID +echo $(date) $(cat atest/$task.txt) start +echo I am host $(hostname) node $n, cpu "$PMI_FD", local $id, task $task +sleep 4 +echo $(date) $(cat atest/$id.txt) end + diff -r 8fb11e337e23 -r 2fc33145242b bin/masterMaster.sh --- a/bin/masterMaster.sh Sun Mar 14 21:25:01 2021 +0000 +++ b/bin/masterMaster.sh Sun Mar 14 21:28:02 2021 +0000 @@ -1,9 +1,10 @@ #!/bin/bash # This runs on login machine to launch the real task (named by $1) on two machines # Any further args are passed to $task.sh -echo $(date) Launching master workers for "$@" +echo $(date) Launching master workers for "$@" from $(hostname) task=$1 shift -nodes=($(scontrol show hostname "$SLURM_JOB_NODELIST")) -seq 1 -1 0 | parallel --will-cite --joblog master_$task.log -S ${nodes[1]} -S ${nodes[0]} -N 1 --workdir /dev/shm --transferfile ${task}_'{}'.txt $* $HOME/bin/$task.sh '{}' "$@" +#nodes=($(scontrol show hostname "$SLURM_JOB_NODELIST")) +#seq 1 -1 0 | $HOME/gentoo/usr/bin/parallel --will-cite --joblog master_$task.log -S "ssh -o StrictHostKeyChecking=accept-new ${nodes[1]}" -S "ssh -o StrictHostKeyChecking=accept-new ${nodes[0]}" -N 1 --workdir /dev/shm --transferfile ${task}_'{}'.txt $* $HOME/bin/$task.sh '{}' "$@" +srun $HOME/bin/$task.sh "$@" echo $(date) $task "$@" workers done diff -r 8fb11e337e23 -r 2fc33145242b masterJob.sh --- a/masterJob.sh Sun Mar 14 21:25:01 2021 +0000 +++ b/masterJob.sh Sun Mar 14 21:28:02 2021 +0000 @@ -1,20 +1,17 @@ #!/bin/bash # Attempt at a master Master job -# Usage: sbatch [--nodes=n] [--tasks-per-node=n] masterJob.sh taskName [args...] -# Implies that ~/taskName_{0,1}.txt contain the necessary parameterisation -#SBATCH --nodes=2 -#SBATCH --exclusive -#SBATCH --tasks-per-node 36 -#SBATCH --cpus-per-task 1 +# Usage: sbatch [--exclusive --nodes= --tasks-per-node= | -n ... -c ...] masterJob.sh taskName [args...] +# Note that --exclusive is _not_ set by default +# Use -n n -c c for to launch n 'parallel ...' jobs each of which spawns c actual workers which +# divide up the job task file task file (see below) between them. +# Implies that ~/taskName_{0..n}.txt or ~/taskName/{0..n}.txt contain the necessary parameterisation #SBATCH --time=00:01:00 #SBATCH --partition=standard -#SBATCH --qos=short -#SBATCH --account=ec166-guest +#SBATCH --qos=standard +#SBATCH --account=dc007 #SBATCH --job-name stest cd ${SLURM_SUBMIT_DIR} -module load gnu-parallel - bin/masterMaster.sh "$@"