changeset 11:b0d9fe66ce8a

give up on mpiexec_mpt
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 25 Feb 2020 18:33:22 +0000
parents a33db8e3f51c
children 29263ba42361
files bin/doPlinks.sh bin/plinks.py bin/plinks.sh bin/plinksMaster.sh bin/test.sh plinksJob.sh testJob.sh
diffstat 7 files changed, 35 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/bin/doPlinks.sh	Tue Feb 25 14:56:36 2020 +0000
+++ b/bin/doPlinks.sh	Tue Feb 25 18:33:22 2020 +0000
@@ -8,7 +8,7 @@
 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.pdf'
 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2
 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\
-plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/stopJob ; exit 1 ; }
+$HOME/bin/plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/x$hn/${tfn}/stopJob ; exit 1 ; }
 echo $(date) $hn tarring $(ls badpdfs_*|wc -l)/$(ls links_*_*|wc -l) results from job $jn for $tfn in $(pwd) 1>&2
 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_*
 echo $(date) $(pwd) rm $(ls -lt badpdfs_*) 1>&2
--- a/bin/plinks.py	Tue Feb 25 14:56:36 2020 +0000
+++ b/bin/plinks.py	Tue Feb 25 18:33:22 2020 +0000
@@ -19,6 +19,7 @@
       if limited:
         print("%s\t%s\tProcessing limited after timeout"%(
           datetime.now().isoformat(),fno),file=bf)
+        bf.flush()
       if bool(links) and (links.get('scrape',False) or
                           links.get('annot',False)):
         gf+=1
@@ -30,13 +31,15 @@
       if str(e)=='Unexpected EOF':
         print("%s:\t%s\t%s\t%s"%(datetime.now().isoformat(),
                                  tarnum,fno,e),file=bf)
+        bf.flush()
       else:
         print("%s: "%(datetime.now().isoformat()),end='',file=bf)
         traceback.print_exc(file=bf)
-
-    if (path.exists('/dev/shm/stopJob')):
+        bf.flush()
+    if path.exists('stopJob'):
       print("%s: Quiting early: %s %s"%(datetime.now().isoformat(),tarnum,fno),
             file=sys.stderr)
+      sys.stderr.flush()
       exit(1)          
 now=datetime.now().isoformat()
 print('%s: exiting from %s having found %s files with links out of %s'%(now,
--- a/bin/plinks.sh	Tue Feb 25 14:56:36 2020 +0000
+++ b/bin/plinks.sh	Tue Feb 25 18:33:22 2020 +0000
@@ -1,8 +1,8 @@
 #!/usr/bin/bash
-mkdir -p $TMPDIR
+module load miniconda/python3
 echo $(date) $(hostname)
 h=$(hostname)
 hn=${h##*n}
-if [ $hn -eq 0 ]; then echo {013..062}; else echo {063..112}; fi |\
-tr ' ' '\012' |parallel --will-cite -j 30 -N 1 doPlinks.sh ${hn} '{#}' '{}'
+if [ $hn -eq 0 ]; then echo {013..014}; else echo {015..016}; fi |\
+tr ' ' '\012' |parallel --will-cite -j 30 -N 1 bin/doPlinks.sh ${hn} '{#}' '{}'
 echo $(date) $(hostname) $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/plinksMaster.sh	Tue Feb 25 18:33:22 2020 +0000
@@ -0,0 +1,5 @@
+#!/bin/bash
+# This runs on 1 machine to launch the real job on two machines
+echo $(date) Launching plinks workers
+parallel --will-cite --nonall -S r1i5n0 -S r1i5n1 bin/plinks.sh
+echo $(date) Workers done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/test.sh	Tue Feb 25 18:33:22 2020 +0000
@@ -0,0 +1,5 @@
+#!/bin/bash
+pwd
+parallel --will-cite --nonall -S r1i5n0 -S r1i5n1 'echo $$ $(hostname); n=$(echo $(hostname)|cut -c 6); nohup sleep $((n*5)); echo done $n'
+echo pdone
+
--- a/plinksJob.sh	Tue Feb 25 14:56:36 2020 +0000
+++ b/plinksJob.sh	Tue Feb 25 18:33:22 2020 +0000
@@ -6,12 +6,8 @@
 #PBS -A dc007
 #PBS -N plinks
 
-module load mpt
+#module load mpt
 
 cd ${PBS_O_WORKDIR}
+bin/plinksMaster.sh
 
-export MPI_SHEPHERD=true
-#export MPI_UNBUFFERED_STDIO=true not needed anymore since debug logging pruned
-
-mpiexec_mpt -ppn 1 -n 2 bin/plinks.sh 2019-35
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testJob.sh	Tue Feb 25 18:33:22 2020 +0000
@@ -0,0 +1,14 @@
+#!/bin/bash
+#PBS -l select=2:ncpus=36
+#PBS -l place=exclhost
+#PBS -l walltime=08:00:00
+#PBS -V
+#PBS -A dc007
+#PBS -N plinks
+
+#module load mpt
+
+cd ${PBS_O_WORKDIR}
+bin/test.sh
+
+