annotate bin/_nl1.sh @ 233:cc2be4008dae

for use in Stuttgart, maybe
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 09 May 2024 12:36:57 +0100
parents f27061e8a9da
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/bin/bash
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # This runs on the compute nodes...
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 # Args: CC-MAIN-2019-35 nl1_counts langs 0 299
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 # count languages (from file named by langs, found in resdir) in parallel, taking input directly from $cc/cdx_counts/xxx.tsv
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 N=$SLURM_JOB_NUM_NODES
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 n=$SLURM_NTASKS
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 c=$SLURM_CPUS_PER_TASK
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 nodename=$SLURMD_NODENAME
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 local=$SLURM_LOCALID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 node=$SLURM_NODEID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 task=$SLURM_PROCID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 threadsPerTask=2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 pjobs=$((c / $threadsPerTask))
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 cc=$1
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 resdir=$W/$USER/results/$cc/$2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 srcdir=$W/hst/results/$cc/cdx_counts
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 langs=$3
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 langfile=$resdir/$langs
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22 s1=$4
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 sn=$5
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 echo $(date) task $n.$task on $nodename:$N.$node start $(pwd) 1>&2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 mkdir -p $resdir
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29 doit () {
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 echo $(date) start $1 $task $PARALLEL_SEQ 1>&2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31 fgrep ' w ' $srcdir/$1.tsv | \
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
32 awk 'BEGIN {while (getline < "'$langfile'") {l[$0]=1}}
4
f27061e8a9da convert to no longer need uniq -c
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 2
diff changeset
33 {if (l[$5]) {print $1,$2,$5}}' | \
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34 $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
35 echo $(date) end $1 $task $PARALLEL_SEQ 1>&2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36 }
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38 export -f doit
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
39 export srcdir resdir task langs langfile
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
40
4
f27061e8a9da convert to no longer need uniq -c
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 2
diff changeset
41 $W/hst/bin/share_by_task.sh -f "%03g\n" -s $s1 $sn $n $task | parallel -j $pjobs doit '{}'
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
43 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
44