view bin/_ex1.sh @ 2:b4801f5696b2

compute node workers, see cirrus_home/bin repo for login node masters
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:22:42 +0100
parents
children 668579197bec
line wrap: on
line source

#!/bin/bash
# This runs on the compute nodes...
# count top 21 solitary languages in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv

N=$SLURM_JOB_NUM_NODES
n=$SLURM_NTASKS
c=$SLURM_CPUS_PER_TASK
nodename=$SLURMD_NODENAME
local=$SLURM_LOCALID
node=$SLURM_NODEID
task=$SLURM_PROCID

threadsPerTask=2
pjobs=$((c / $threadsPerTask))

cc=$1
resdir=$W/$USER/results/$cc/$2
srcdir=$W/hst/results/$cc/cdx_counts
langs=$3
s1=$4
sn=$5

echo $(date) task $n.$task on $nodename:$N.$node start

mkdir -p $resdir

doit () {
 echo $(date) start $1 $task $PARALLEL_SEQ
 fgrep '	w	' $srcdir/$1.tsv | awk 'BEGIN { | uniq -c | \
    $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv
 echo $(date) end $1 $task $PARALLEL_SEQ 
}

export -f doit
export srcdir resdir task

seq $s1 $sn | while read i
 do if [ $((i % $n)) -eq $task ]
 then printf '%03g\n' $i
 fi
 done | \
   parallel -j $pjobs doit '{}'

echo $(date) task $n.$task on $nodename:$N.$node end