diff bin/cdx_tab.sh @ 163:ef961d91eea5

previous approach to lang/field extraction
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 18:16:27 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/cdx_tab.sh	Mon Jul 18 18:16:27 2022 +0100
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Invoke this as e.g. sbatch -n 30 -c 10 masterJob.sh cdx_segment CC-MAIN-2019-35
+# run cdx_segment.py in parallel 
+n=$SLURM_NTASKS
+c=$SLURM_CPUS_PER_TASK
+node=$SLURMD_NODENAME
+local=$SLURM_LOCALID
+proc=$SLURM_PROCID
+echo $(date) $node:$proc start
+
+type parallel
+#module load gnu-parallel
+
+PYTHONPATH=$PYTHONPATH:$HOME/lib/python
+parallel --will-cite -j $c doC2T.sh "$1" '{}' < cdx_tab/$proc.txt
+
+echo $(date) $proc end