Mercurial > hg > cc > cirrus_work
view bin/_mt1.sh @ 93:25bd398a8035
improve reordering, still failing on cdx-00004
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 06 Sep 2023 18:51:21 +0100 |
parents | a9763cd18949 |
children |
line wrap: on
line source
#!/bin/bash # This runs on the compute nodes... # Args: CC-MAIN-2019-35 mt1_counts mtypes 4 0 299 # count 'mime' or 'mime-detected' values (restricted by entries in file # named by mtypes, found in resdir) in parallel, # taking input directly from $cc/cdx_mime/xxx.tsv N=$SLURM_JOB_NUM_NODES n=$SLURM_NTASKS c=$SLURM_CPUS_PER_TASK nodename=$SLURMD_NODENAME local=$SLURM_LOCALID node=$SLURM_NODEID task=$SLURM_PROCID threadsPerTask=2 pjobs=$((c / $threadsPerTask)) cc=$1 resdir=$W/$USER/results/$cc/$2 srcdir=$W/hst/results/$cc/cdx_counts mcol=$3 mts=$4 mtfile=$resdir/$mts s1=$5 sn=$6 echo $(date) task $n.$task on $nodename:$N.$node start $(pwd) 1>&2 mkdir -p $resdir doit () { echo $(date) start $1 $task $PARALLEL_SEQ 1>&2 fgrep ' w ' $srcdir/$1.tsv | \ awk -v mc=$mcol 'BEGIN {while (getline < "'$mtfile'") {l[$0]=1}} {if (l[]) {print $1,$2,$5}}' | \ $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv echo $(date) end $1 $task $PARALLEL_SEQ 1>&2 } export -f doit export srcdir resdir task langs langfile $W/hst/bin/share_by_task.sh -f "%03g\n" -s $s1 $sn $n $task | parallel -j $pjobs doit '{}' echo $(date) task $n.$task on $nodename:$N.$node end 1>&2