view bin/cdx2sql.sh @ 182:dfb88dee52b8

tab completion fix
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 08 Sep 2023 21:42:12 +0100
parents afd7879181c9
children
line wrap: on
line source

#!/bin/bash
# Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh cdx2sql \
#                     CC-MAIN-2019-35 cdx_db
# run cdx2sql.py in parallel, working index files enumerated in cdx2sql/n.c.txt
n=$SLURM_NTASKS
c=$SLURM_CPUS_PER_TASK
node=$SLURMD_NODENAME
task=$SLURM_LOCALID
node=$SLURM_NODEID

cc=$1
resdir=$2

echo $(date) $nodename:$node:$task start

export PYTHONPATH=$PYTHONPATH:$HOME/lib/python

ld=/dev/shm/ht/$task
mkdir -p $ld

parallel --will-cite -j $c doC2S.sh $node $task $cc $resdir $ld '{}' < cdx2sql/$node.$task.txt

if [ "$ld" ]
then
 cd $ld
 tar -czf $HOME/results/$cc/$resdir/$node.$task.tar.gz *
 rm *
fi

echo $(date) $nodename:$node:$task end