Mercurial > hg > cc > cirrus_work
view bin/_ex1.sh @ 230:a0e2473deb33
post-processing
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 29 Feb 2024 15:01:02 +0000 |
parents | 668579197bec |
children |
line wrap: on
line source
#!/bin/bash # This runs on the compute nodes... # count http vs. https vs. english, chinese in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv N=$SLURM_JOB_NUM_NODES n=$SLURM_NTASKS c=$SLURM_CPUS_PER_TASK nodename=$SLURMD_NODENAME local=$SLURM_LOCALID node=$SLURM_NODEID task=$SLURM_PROCID threadsPerTask=2 pjobs=$((c / $threadsPerTask)) cc=$1 resdir=$W/$USER/results/$cc/$2 srcdir=$W/hst/results/$cc/cdx_counts s1=$3 sn=$4 echo $(date) task $n.$task on $nodename:$N.$node start mkdir -p $resdir doit () { echo $(date) start $1 $task $PARALLEL_SEQ fgrep ' w ' $srcdir/$1.tsv | cut -f 3,4 | uniq -c | \ $W/shared/bin/uniq_merge.py > $resdir/ex1_$1.tsv echo $(date) end $1 $task $PARALLEL_SEQ } export -f doit export srcdir resdir task seq $s1 $sn | while read i do if [ $((i % $n)) -eq $task ] then printf '%03g\n' $i fi done | \ parallel -j $pjobs doit '{}' echo $(date) task $n.$task on $nodename:$N.$node end