Mercurial > hg > cc > cirrus_home
changeset 11:b0d9fe66ce8a
give up on mpiexec_mpt
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 25 Feb 2020 18:33:22 +0000 |
parents | a33db8e3f51c |
children | 29263ba42361 |
files | bin/doPlinks.sh bin/plinks.py bin/plinks.sh bin/plinksMaster.sh bin/test.sh plinksJob.sh testJob.sh |
diffstat | 7 files changed, 35 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/doPlinks.sh Tue Feb 25 14:56:36 2020 +0000 +++ b/bin/doPlinks.sh Tue Feb 25 18:33:22 2020 +0000 @@ -8,7 +8,7 @@ tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.pdf' echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ -plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/stopJob ; exit 1 ; } +$HOME/bin/plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/x$hn/${tfn}/stopJob ; exit 1 ; } echo $(date) $hn tarring $(ls badpdfs_*|wc -l)/$(ls links_*_*|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_* echo $(date) $(pwd) rm $(ls -lt badpdfs_*) 1>&2
--- a/bin/plinks.py Tue Feb 25 14:56:36 2020 +0000 +++ b/bin/plinks.py Tue Feb 25 18:33:22 2020 +0000 @@ -19,6 +19,7 @@ if limited: print("%s\t%s\tProcessing limited after timeout"%( datetime.now().isoformat(),fno),file=bf) + bf.flush() if bool(links) and (links.get('scrape',False) or links.get('annot',False)): gf+=1 @@ -30,13 +31,15 @@ if str(e)=='Unexpected EOF': print("%s:\t%s\t%s\t%s"%(datetime.now().isoformat(), tarnum,fno,e),file=bf) + bf.flush() else: print("%s: "%(datetime.now().isoformat()),end='',file=bf) traceback.print_exc(file=bf) - - if (path.exists('/dev/shm/stopJob')): + bf.flush() + if path.exists('stopJob'): print("%s: Quiting early: %s %s"%(datetime.now().isoformat(),tarnum,fno), file=sys.stderr) + sys.stderr.flush() exit(1) now=datetime.now().isoformat() print('%s: exiting from %s having found %s files with links out of %s'%(now,
--- a/bin/plinks.sh Tue Feb 25 14:56:36 2020 +0000 +++ b/bin/plinks.sh Tue Feb 25 18:33:22 2020 +0000 @@ -1,8 +1,8 @@ #!/usr/bin/bash -mkdir -p $TMPDIR +module load miniconda/python3 echo $(date) $(hostname) h=$(hostname) hn=${h##*n} -if [ $hn -eq 0 ]; then echo {013..062}; else echo {063..112}; fi |\ -tr ' ' '\012' |parallel --will-cite -j 30 -N 1 doPlinks.sh ${hn} '{#}' '{}' +if [ $hn -eq 0 ]; then echo {013..014}; else echo {015..016}; fi |\ +tr ' ' '\012' |parallel --will-cite -j 30 -N 1 bin/doPlinks.sh ${hn} '{#}' '{}' echo $(date) $(hostname) $?
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/plinksMaster.sh Tue Feb 25 18:33:22 2020 +0000 @@ -0,0 +1,5 @@ +#!/bin/bash +# This runs on 1 machine to launch the real job on two machines +echo $(date) Launching plinks workers +parallel --will-cite --nonall -S r1i5n0 -S r1i5n1 bin/plinks.sh +echo $(date) Workers done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/test.sh Tue Feb 25 18:33:22 2020 +0000 @@ -0,0 +1,5 @@ +#!/bin/bash +pwd +parallel --will-cite --nonall -S r1i5n0 -S r1i5n1 'echo $$ $(hostname); n=$(echo $(hostname)|cut -c 6); nohup sleep $((n*5)); echo done $n' +echo pdone +
--- a/plinksJob.sh Tue Feb 25 14:56:36 2020 +0000 +++ b/plinksJob.sh Tue Feb 25 18:33:22 2020 +0000 @@ -6,12 +6,8 @@ #PBS -A dc007 #PBS -N plinks -module load mpt +#module load mpt cd ${PBS_O_WORKDIR} +bin/plinksMaster.sh -export MPI_SHEPHERD=true -#export MPI_UNBUFFERED_STDIO=true not needed anymore since debug logging pruned - -mpiexec_mpt -ppn 1 -n 2 bin/plinks.sh 2019-35 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testJob.sh Tue Feb 25 18:33:22 2020 +0000 @@ -0,0 +1,14 @@ +#!/bin/bash +#PBS -l select=2:ncpus=36 +#PBS -l place=exclhost +#PBS -l walltime=08:00:00 +#PBS -V +#PBS -A dc007 +#PBS -N plinks + +#module load mpt + +cd ${PBS_O_WORKDIR} +bin/test.sh + +