comparison bin/sql2csv.sh @ 152:0072e4ee6c67

use sqlite3 just to tabulate
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 28 Oct 2021 12:11:08 +0000
parents
children
comparison
equal deleted inserted replaced
151:66d17f7410f2 152:0072e4ee6c67
1 #!/bin/bash
2 # Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh sql2csv \
3 # CC-MAIN-2019-35 cdx_db 20-43
4 # run sql2csv.py in parallel, taking input directly from .../24-43.n.c.tar.gz
5 n=$SLURM_NTASKS
6 c=$SLURM_CPUS_PER_TASK
7 node=$SLURMD_NODENAME
8 task=$SLURM_LOCALID
9 node=$SLURM_NODEID
10
11 cc=$1
12 resdir=$2
13 segs=$3
14
15 echo $(date) $nodename:$node:$task start
16
17 export PYTHONPATH=$PYTHONPATH:$HOME/lib/python
18
19 ld=/dev/shm/ht/$task
20 mkdir -p $ld
21
22 cd $ld
23 tar --wildcards -xf $HOME/results/$cc/$resdir/$segs.$node.$task.tar.gz '*.db'
24 cd $HOME
25
26 ls $ld/*.db | \
27 parallel --will-cite -j $c doS2C.sh $node $task $cc $resdir $segs $ld '{}'
28
29 if [ "$ld" ]
30 then
31 cd $ld
32 cp *.csv $HOME/results/$cc/$resdir
33 rm *
34 fi
35
36 echo $(date) $nodename:$node:$task end
37
38