Mercurial > hg > cc > cirrus_home
changeset 166:afd7879181c9
old style
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 18 Jul 2022 19:15:20 +0100 |
parents | e7fcae59c735 |
children | 3213a8bb2ed1 |
files | bin/cdx2sql.sh |
diffstat | 1 files changed, 32 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/cdx2sql.sh Mon Jul 18 19:15:20 2022 +0100 @@ -0,0 +1,32 @@ +#!/bin/bash +# Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh cdx2sql \ +# CC-MAIN-2019-35 cdx_db +# run cdx2sql.py in parallel, working index files enumerated in cdx2sql/n.c.txt +n=$SLURM_NTASKS +c=$SLURM_CPUS_PER_TASK +node=$SLURMD_NODENAME +task=$SLURM_LOCALID +node=$SLURM_NODEID + +cc=$1 +resdir=$2 + +echo $(date) $nodename:$node:$task start + +export PYTHONPATH=$PYTHONPATH:$HOME/lib/python + +ld=/dev/shm/ht/$task +mkdir -p $ld + +parallel --will-cite -j $c doC2S.sh $node $task $cc $resdir $ld '{}' < cdx2sql/$node.$task.txt + +if [ "$ld" ] +then + cd $ld + tar -czf $HOME/results/$cc/$resdir/$node.$task.tar.gz * + rm * +fi + +echo $(date) $nodename:$node:$task end + +