Mercurial > hg > cc > cirrus_work
comparison bin/_mt1.sh @ 15:a9763cd18949
in progress...
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 07 Aug 2022 13:56:00 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
14:4e1ecfa46bee | 15:a9763cd18949 |
---|---|
1 #!/bin/bash | |
2 # This runs on the compute nodes... | |
3 # Args: CC-MAIN-2019-35 mt1_counts mtypes 4 0 299 | |
4 # count 'mime' or 'mime-detected' values (restricted by entries in file | |
5 # named by mtypes, found in resdir) in parallel, | |
6 # taking input directly from $cc/cdx_mime/xxx.tsv | |
7 | |
8 N=$SLURM_JOB_NUM_NODES | |
9 n=$SLURM_NTASKS | |
10 c=$SLURM_CPUS_PER_TASK | |
11 nodename=$SLURMD_NODENAME | |
12 local=$SLURM_LOCALID | |
13 node=$SLURM_NODEID | |
14 task=$SLURM_PROCID | |
15 | |
16 threadsPerTask=2 | |
17 pjobs=$((c / $threadsPerTask)) | |
18 | |
19 cc=$1 | |
20 resdir=$W/$USER/results/$cc/$2 | |
21 srcdir=$W/hst/results/$cc/cdx_counts | |
22 mcol=$3 | |
23 mts=$4 | |
24 mtfile=$resdir/$mts | |
25 s1=$5 | |
26 sn=$6 | |
27 | |
28 echo $(date) task $n.$task on $nodename:$N.$node start $(pwd) 1>&2 | |
29 | |
30 mkdir -p $resdir | |
31 | |
32 doit () { | |
33 echo $(date) start $1 $task $PARALLEL_SEQ 1>&2 | |
34 fgrep ' w ' $srcdir/$1.tsv | \ | |
35 awk -v mc=$mcol 'BEGIN {while (getline < "'$mtfile'") {l[$0]=1}} | |
36 {if (l[]) {print $1,$2,$5}}' | \ | |
37 $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv | |
38 echo $(date) end $1 $task $PARALLEL_SEQ 1>&2 | |
39 } | |
40 | |
41 export -f doit | |
42 export srcdir resdir task langs langfile | |
43 | |
44 $W/hst/bin/share_by_task.sh -f "%03g\n" -s $s1 $sn $n $task | parallel -j $pjobs doit '{}' | |
45 | |
46 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 | |
47 |