Mercurial > hg > cc > cirrus_home
diff bin/sql2tsv.sh @ 154:2643a6825f17
instead of csv
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 17 Nov 2021 18:26:33 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/sql2tsv.sh Wed Nov 17 18:26:33 2021 +0000 @@ -0,0 +1,38 @@ +#!/bin/bash +# Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh sql2tsv \ +# CC-MAIN-2019-35 cdx_db 0-299 +# run sql2tsv.py in parallel, taking input directly from .../0-299.n.c.tar.gz +n=$SLURM_NTASKS +c=$SLURM_CPUS_PER_TASK +node=$SLURMD_NODENAME +task=$SLURM_LOCALID +node=$SLURM_NODEID + +cc=$1 +resdir=$2 +segs=$3 + +echo $(date) $nodename:$node:$task start + +export PYTHONPATH=$PYTHONPATH:$HOME/lib/python + +ld=/dev/shm/ht/$task +mkdir -p $ld + +cd $ld +tar --wildcards -xf $HOME/results/$cc/$resdir/$segs.$node.$task.tar.gz '*.db' +cd $HOME + +ls $ld/*.db | \ + parallel --will-cite -j $c doS2T.sh $node $task $cc $resdir $segs $ld '{}' + +if [ "$ld" ] +then + cd $ld + cp *.tsv $HOME/results/$cc/$resdir + rm * +fi + +echo $(date) $nodename:$node:$task end + +