comparison bin/_nl1.sh @ 2:b4801f5696b2

compute node workers, see cirrus_home/bin repo for login node masters
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:22:42 +0100
parents
children f27061e8a9da
comparison
equal deleted inserted replaced
1:d5b6748f29a9 2:b4801f5696b2
1 #!/bin/bash
2 # This runs on the compute nodes...
3 # Args: CC-MAIN-2019-35 nl1_counts langs 0 299
4 # count languages (from file named by langs, found in resdir) in parallel, taking input directly from $cc/cdx_counts/xxx.tsv
5
6 N=$SLURM_JOB_NUM_NODES
7 n=$SLURM_NTASKS
8 c=$SLURM_CPUS_PER_TASK
9 nodename=$SLURMD_NODENAME
10 local=$SLURM_LOCALID
11 node=$SLURM_NODEID
12 task=$SLURM_PROCID
13
14 threadsPerTask=2
15 pjobs=$((c / $threadsPerTask))
16
17 cc=$1
18 resdir=$W/$USER/results/$cc/$2
19 srcdir=$W/hst/results/$cc/cdx_counts
20 langs=$3
21 langfile=$resdir/$langs
22 s1=$4
23 sn=$5
24
25 echo $(date) task $n.$task on $nodename:$N.$node start $(pwd) 1>&2
26
27 mkdir -p $resdir
28
29 doit () {
30 echo $(date) start $1 $task $PARALLEL_SEQ 1>&2
31 fgrep ' w ' $srcdir/$1.tsv | \
32 awk 'BEGIN {while (getline < "'$langfile'") {l[$0]=1}}
33 {if (l[$4]) {print $1,$4}}' | uniq -c | \
34 $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv
35 echo $(date) end $1 $task $PARALLEL_SEQ 1>&2
36 }
37
38 export -f doit
39 export srcdir resdir task langs langfile
40
41 seq $s1 $sn | while read i
42 do if [ $((i % $n)) -eq $task ]
43 then printf '%03g\n' $i
44 fi
45 done | \
46 parallel -j $pjobs doit '{}'
47
48 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
49