Mercurial > hg > cc > cirrus_work
comparison bin/_nl1.sh @ 2:b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 18 Jul 2022 19:22:42 +0100 |
parents | |
children | f27061e8a9da |
comparison
equal
deleted
inserted
replaced
1:d5b6748f29a9 | 2:b4801f5696b2 |
---|---|
1 #!/bin/bash | |
2 # This runs on the compute nodes... | |
3 # Args: CC-MAIN-2019-35 nl1_counts langs 0 299 | |
4 # count languages (from file named by langs, found in resdir) in parallel, taking input directly from $cc/cdx_counts/xxx.tsv | |
5 | |
6 N=$SLURM_JOB_NUM_NODES | |
7 n=$SLURM_NTASKS | |
8 c=$SLURM_CPUS_PER_TASK | |
9 nodename=$SLURMD_NODENAME | |
10 local=$SLURM_LOCALID | |
11 node=$SLURM_NODEID | |
12 task=$SLURM_PROCID | |
13 | |
14 threadsPerTask=2 | |
15 pjobs=$((c / $threadsPerTask)) | |
16 | |
17 cc=$1 | |
18 resdir=$W/$USER/results/$cc/$2 | |
19 srcdir=$W/hst/results/$cc/cdx_counts | |
20 langs=$3 | |
21 langfile=$resdir/$langs | |
22 s1=$4 | |
23 sn=$5 | |
24 | |
25 echo $(date) task $n.$task on $nodename:$N.$node start $(pwd) 1>&2 | |
26 | |
27 mkdir -p $resdir | |
28 | |
29 doit () { | |
30 echo $(date) start $1 $task $PARALLEL_SEQ 1>&2 | |
31 fgrep ' w ' $srcdir/$1.tsv | \ | |
32 awk 'BEGIN {while (getline < "'$langfile'") {l[$0]=1}} | |
33 {if (l[$4]) {print $1,$4}}' | uniq -c | \ | |
34 $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv | |
35 echo $(date) end $1 $task $PARALLEL_SEQ 1>&2 | |
36 } | |
37 | |
38 export -f doit | |
39 export srcdir resdir task langs langfile | |
40 | |
41 seq $s1 $sn | while read i | |
42 do if [ $((i % $n)) -eq $task ] | |
43 then printf '%03g\n' $i | |
44 fi | |
45 done | \ | |
46 parallel -j $pjobs doit '{}' | |
47 | |
48 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 | |
49 |