view bin/plinks.py @ 1:a4b0359456bc

switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
author Henry Thompson <ht@markup.co.uk>
date Tue, 18 Feb 2020 21:33:35 +0000
parents 65a56c0d1c1f
children 83ed7c5846b2
line wrap: on
line source

#!/lustre/sw/miniconda3/bin/python3
import sys,pdfx,traceback
from os import path

def run(file):
  global pdf
  pdf=pdfx.PDFx(file)
  return pdf.get_references_as_dict()

me=sys.argv[1]
with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf:
  for l in sys.stdin:
    (fno,f)=l.rstrip().split()
    try:
      links=run(file)
      with open('/dev/shm/x/links_%s'%fno,'w') as of:
        for k in links.keys():
          for l in links[k]:
            print("%s\t%s\t%s"%(k,l),file=of)
    except Exception as e:
      print("%s\t%s\t%s"%(fno,e),file=bf)
    if (path.exists('/dev/shm/stopJob')):
      print("Quiting early: %s %s"%(me,fno),file=sys.stderr)
      exit(1)