Mercurial > hg > cc > cirrus_home
changeset 1:a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Tue, 18 Feb 2020 21:33:35 +0000 |
parents | 65a56c0d1c1f |
children | 83ed7c5846b2 |
files | bin/doPlinks.sh bin/plinks.py |
diffstat | 2 files changed, 32 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/doPlinks.sh Tue Feb 18 13:15:05 2020 +0000 +++ b/bin/doPlinks.sh Tue Feb 18 21:33:35 2020 +0000 @@ -1,15 +1,14 @@ #!/usr/bin/bash -me=$1 -mine=0 -mkdir /dev/shm/x -while read f -do - if plinks.py $f > /dev/shm/x/links_${me}_${mine} 2>/dev/null - then - ((mine+=1)) - else - echo $f >> /dev/shm/x/badpdfs_$me - rm -f /dev/shm/x/links_${me}_${mine} - fi -done +mkdir -p /dev/shm/x +plinks.py $1 +# while read f +# do +# if plinks.py $f > /dev/shm/x/links_${me}_${mine} 2>/dev/null +# then +# ((mine+=1)) +# else +# echo $f >> /dev/shm/x/badpdfs_$me +# rm -f /dev/shm/x/links_${me}_${mine} +# fi +# done rsync -a /dev/shm/x/ links
--- a/bin/plinks.py Tue Feb 18 13:15:05 2020 +0000 +++ b/bin/plinks.py Tue Feb 18 21:33:35 2020 +0000 @@ -1,24 +1,24 @@ #!/lustre/sw/miniconda3/bin/python3 import sys,pdfx,traceback -def run(): +from os import path + +def run(file): global pdf - try: - pdf=pdfx.PDFx(sys.argv[1]) - links=pdf.get_references_as_dict() - except: - traceback.print_exc() - print("\nFailed: %s"%sys.argv[1],file=sys.stderr) - exit(1) - for k in links.keys(): - for l in links[k]: - print("%s\t%s"%(k,l)) + pdf=pdfx.PDFx(file) + return pdf.get_references_as_dict() -if sys.argv[1]=='-t': - import timeit - sys.argv.pop(1) - n=sys.argv[1] - sys.argv.pop(1) - print(timeit.timeit("run()",number=int(n), - setup="from __main__ import run"),file=sys.stderr) -else: - run() +me=sys.argv[1] +with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf: + for l in sys.stdin: + (fno,f)=l.rstrip().split() + try: + links=run(file) + with open('/dev/shm/x/links_%s'%fno,'w') as of: + for k in links.keys(): + for l in links[k]: + print("%s\t%s\t%s"%(k,l),file=of) + except Exception as e: + print("%s\t%s\t%s"%(fno,e),file=bf) + if (path.exists('/dev/shm/stopJob')): + print("Quiting early: %s %s"%(me,fno),file=sys.stderr) + exit(1)