166
|
1 #!/bin/bash
|
|
2 # Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh cdx2sql \
|
|
3 # CC-MAIN-2019-35 cdx_db
|
|
4 # run cdx2sql.py in parallel, working index files enumerated in cdx2sql/n.c.txt
|
|
5 n=$SLURM_NTASKS
|
|
6 c=$SLURM_CPUS_PER_TASK
|
|
7 node=$SLURMD_NODENAME
|
|
8 task=$SLURM_LOCALID
|
|
9 node=$SLURM_NODEID
|
|
10
|
|
11 cc=$1
|
|
12 resdir=$2
|
|
13
|
|
14 echo $(date) $nodename:$node:$task start
|
|
15
|
|
16 export PYTHONPATH=$PYTHONPATH:$HOME/lib/python
|
|
17
|
|
18 ld=/dev/shm/ht/$task
|
|
19 mkdir -p $ld
|
|
20
|
|
21 parallel --will-cite -j $c doC2S.sh $node $task $cc $resdir $ld '{}' < cdx2sql/$node.$task.txt
|
|
22
|
|
23 if [ "$ld" ]
|
|
24 then
|
|
25 cd $ld
|
|
26 tar -czf $HOME/results/$cc/$resdir/$node.$task.tar.gz *
|
|
27 rm *
|
|
28 fi
|
|
29
|
|
30 echo $(date) $nodename:$node:$task end
|
|
31
|
|
32
|