comparison bin/plinks.py @ 6:0f494c76a887

refactor to address tarred-up pdfs
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 23 Feb 2020 16:48:34 +0000
parents a28d731977da
children 25ca3505b4d7
comparison
equal deleted inserted replaced
5:a28d731977da 6:0f494c76a887
5 def run(file): 5 def run(file):
6 global pdf 6 global pdf
7 pdf=pdfx.PDFx(file) 7 pdf=pdfx.PDFx(file)
8 return pdf.get_references_as_dict() 8 return pdf.get_references_as_dict()
9 9
10 me=sys.argv[1] 10 tarnum=sys.argv[1]
11 with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf: 11 with open('badpdfs_%s'%tarnum,'w') as bf:
12 for l in sys.stdin: 12 for l in sys.stdin:
13 (fno,f)=l.rstrip().split() 13 (fno,f)=l.split()
14 try: 14 try:
15 links=run(f) 15 links=run(f)
16 if bool(links) and (links.get('scrape',False) or 16 if bool(links) and (links.get('scrape',False) or
17 links.get('annot',False)): 17 links.get('annot',False)):
18 with open('/dev/shm/x/links_%s'%fno,'w') as of: 18 with open('links_%s_%s'%(tarnum,fno),'w') as of:
19 for k in links.keys(): 19 for k in links.keys():
20 for l in links[k]: 20 for l in links[k]:
21 print("%s\t%s"%(k,l),file=of) 21 print("%s\t%s"%(k,l),file=of)
22 except Exception as e: 22 except Exception as e:
23 print("%s\t%s"%(fno,e),file=bf) 23 if str(e)=='Unexpected EOF':
24 print("%s\t%s\t%s"%(tarnum,fno,e),file=bf)
25 else:
26 traceback.print_exc(file=bf)
27
24 if (path.exists('/dev/shm/stopJob')): 28 if (path.exists('/dev/shm/stopJob')):
25 print("Quiting early: %s %s"%(me,fno),file=sys.stderr) 29 print("Quiting early: %s %s"%(tarnum,fno),file=sys.stderr)
26 exit(1) 30 exit(1)