view bin/sql2csv.sh @ 166:afd7879181c9

old style
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:15:20 +0100
parents 0072e4ee6c67
children
line wrap: on
line source

#!/bin/bash
# Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh sql2csv \
#                     CC-MAIN-2019-35 cdx_db 20-43
# run sql2csv.py in parallel, taking input directly from .../24-43.n.c.tar.gz
n=$SLURM_NTASKS
c=$SLURM_CPUS_PER_TASK
node=$SLURMD_NODENAME
task=$SLURM_LOCALID
node=$SLURM_NODEID

cc=$1
resdir=$2
segs=$3

echo $(date) $nodename:$node:$task start

export PYTHONPATH=$PYTHONPATH:$HOME/lib/python

ld=/dev/shm/ht/$task
mkdir -p $ld

cd $ld
tar --wildcards -xf $HOME/results/$cc/$resdir/$segs.$node.$task.tar.gz '*.db'
cd $HOME

ls $ld/*.db | \
   parallel --will-cite -j $c doS2C.sh $node $task $cc $resdir $segs $ld '{}'

if [ "$ld" ]
then
 cd $ld
 cp *.csv $HOME/results/$cc/$resdir
 rm *
fi

echo $(date) $nodename:$node:$task end