154
|
1 #!/bin/bash
|
|
2 # Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh sql2tsv \
|
|
3 # CC-MAIN-2019-35 cdx_db 0-299
|
|
4 # run sql2tsv.py in parallel, taking input directly from .../0-299.n.c.tar.gz
|
|
5 n=$SLURM_NTASKS
|
|
6 c=$SLURM_CPUS_PER_TASK
|
|
7 node=$SLURMD_NODENAME
|
|
8 task=$SLURM_LOCALID
|
|
9 node=$SLURM_NODEID
|
|
10
|
|
11 cc=$1
|
|
12 resdir=$2
|
|
13 segs=$3
|
|
14
|
|
15 echo $(date) $nodename:$node:$task start
|
|
16
|
|
17 export PYTHONPATH=$PYTHONPATH:$HOME/lib/python
|
|
18
|
|
19 ld=/dev/shm/ht/$task
|
|
20 mkdir -p $ld
|
|
21
|
|
22 cd $ld
|
|
23 tar --wildcards -xf $HOME/results/$cc/$resdir/$segs.$node.$task.tar.gz '*.db'
|
|
24 cd $HOME
|
|
25
|
|
26 ls $ld/*.db | \
|
|
27 parallel --will-cite -j $c doS2T.sh $node $task $cc $resdir $segs $ld '{}'
|
|
28
|
|
29 if [ "$ld" ]
|
|
30 then
|
|
31 cd $ld
|
|
32 cp *.tsv $HOME/results/$cc/$resdir
|
|
33 rm *
|
|
34 fi
|
|
35
|
|
36 echo $(date) $nodename:$node:$task end
|
|
37
|
|
38
|