changeset 84:2fc33145242b

prepare for real parallel distribution
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 14 Mar 2021 21:28:02 +0000
parents 8fb11e337e23
children e5d5958bf3fe
files bin/atest.sh bin/masterMaster.sh masterJob.sh
diffstat 3 files changed, 22 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/atest.sh	Sun Mar 14 21:28:02 2021 +0000
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Invoke this as e.g. sbatch -n 8 -c 5 masterJob.sh atest
+n=$SLURM_NODEID
+id=$SLURM_LOCALID
+#printenv | fgrep SLURM
+task=$SLURM_PROCID
+echo $(date) $(cat atest/$task.txt) start
+echo I am host $(hostname) node $n, cpu "$PMI_FD", local $id, task $task
+sleep 4
+echo $(date) $(cat atest/$id.txt) end
+
--- a/bin/masterMaster.sh	Sun Mar 14 21:25:01 2021 +0000
+++ b/bin/masterMaster.sh	Sun Mar 14 21:28:02 2021 +0000
@@ -1,9 +1,10 @@
 #!/bin/bash
 # This runs on login machine to launch the real task (named by $1) on two machines
 # Any further args are passed to $task.sh
-echo $(date) Launching master workers for "$@"
+echo $(date) Launching master workers for "$@" from $(hostname)
 task=$1
 shift
-nodes=($(scontrol show hostname "$SLURM_JOB_NODELIST"))
-seq 1 -1 0 | parallel --will-cite --joblog master_$task.log -S ${nodes[1]} -S ${nodes[0]} -N 1 --workdir /dev/shm --transferfile ${task}_'{}'.txt $* $HOME/bin/$task.sh '{}' "$@"
+#nodes=($(scontrol show hostname "$SLURM_JOB_NODELIST"))
+#seq 1 -1 0 | $HOME/gentoo/usr/bin/parallel --will-cite --joblog master_$task.log -S "ssh -o StrictHostKeyChecking=accept-new ${nodes[1]}" -S "ssh -o StrictHostKeyChecking=accept-new  ${nodes[0]}" -N 1 --workdir /dev/shm --transferfile ${task}_'{}'.txt $* $HOME/bin/$task.sh '{}' "$@"
+srun $HOME/bin/$task.sh "$@"
 echo $(date) $task "$@" workers done
--- a/masterJob.sh	Sun Mar 14 21:25:01 2021 +0000
+++ b/masterJob.sh	Sun Mar 14 21:28:02 2021 +0000
@@ -1,20 +1,17 @@
 #!/bin/bash
 # Attempt at a master Master job
-# Usage: sbatch [--nodes=n] [--tasks-per-node=n] masterJob.sh taskName [args...]
-# Implies that ~/taskName_{0,1}.txt contain the necessary parameterisation
-#SBATCH --nodes=2
-#SBATCH --exclusive
-#SBATCH --tasks-per-node 36
-#SBATCH --cpus-per-task 1
+# Usage: sbatch [--exclusive --nodes= --tasks-per-node= | -n ... -c ...] masterJob.sh taskName [args...]
+#   Note that --exclusive is _not_ set by default
+#   Use -n n -c c for to launch n 'parallel ...' jobs each of which spawns c actual workers which
+#    divide up the job task file task file (see below) between them.
+# Implies that ~/taskName_{0..n}.txt or ~/taskName/{0..n}.txt contain the necessary parameterisation
 #SBATCH --time=00:01:00
 #SBATCH --partition=standard
-#SBATCH --qos=short
-#SBATCH --account=ec166-guest
+#SBATCH --qos=standard
+#SBATCH --account=dc007
 #SBATCH --job-name stest
 
 cd ${SLURM_SUBMIT_DIR}
 
-module load gnu-parallel
-
 bin/masterMaster.sh "$@"