Mercurial > hg > cc > cirrus_home
view bin/sql2tsv.sh @ 154:2643a6825f17
instead of csv
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 17 Nov 2021 18:26:33 +0000 |
parents | |
children |
line wrap: on
line source
#!/bin/bash # Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh sql2tsv \ # CC-MAIN-2019-35 cdx_db 0-299 # run sql2tsv.py in parallel, taking input directly from .../0-299.n.c.tar.gz n=$SLURM_NTASKS c=$SLURM_CPUS_PER_TASK node=$SLURMD_NODENAME task=$SLURM_LOCALID node=$SLURM_NODEID cc=$1 resdir=$2 segs=$3 echo $(date) $nodename:$node:$task start export PYTHONPATH=$PYTHONPATH:$HOME/lib/python ld=/dev/shm/ht/$task mkdir -p $ld cd $ld tar --wildcards -xf $HOME/results/$cc/$resdir/$segs.$node.$task.tar.gz '*.db' cd $HOME ls $ld/*.db | \ parallel --will-cite -j $c doS2T.sh $node $task $cc $resdir $segs $ld '{}' if [ "$ld" ] then cd $ld cp *.tsv $HOME/results/$cc/$resdir rm * fi echo $(date) $nodename:$node:$task end