changeset 166:afd7879181c9

old style
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:15:20 +0100
parents e7fcae59c735
children 3213a8bb2ed1
files bin/cdx2sql.sh
diffstat 1 files changed, 32 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/cdx2sql.sh	Mon Jul 18 19:15:20 2022 +0100
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Invoke this as e.g. sbatch -N 4 --ntasks=3 -c 5 masterJob.sh cdx2sql \
+#                     CC-MAIN-2019-35 cdx_db
+# run cdx2sql.py in parallel, working index files enumerated in cdx2sql/n.c.txt
+n=$SLURM_NTASKS
+c=$SLURM_CPUS_PER_TASK
+node=$SLURMD_NODENAME
+task=$SLURM_LOCALID
+node=$SLURM_NODEID
+
+cc=$1
+resdir=$2
+
+echo $(date) $nodename:$node:$task start
+
+export PYTHONPATH=$PYTHONPATH:$HOME/lib/python
+
+ld=/dev/shm/ht/$task
+mkdir -p $ld
+
+parallel --will-cite -j $c doC2S.sh $node $task $cc $resdir $ld '{}' < cdx2sql/$node.$task.txt
+
+if [ "$ld" ]
+then
+ cd $ld
+ tar -czf $HOME/results/$cc/$resdir/$node.$task.tar.gz *
+ rm *
+fi
+
+echo $(date) $nodename:$node:$task end
+
+