Mercurial > hg > cc > cirrus_home
comparison bin/plinks.py @ 6:0f494c76a887
refactor to address tarred-up pdfs
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 23 Feb 2020 16:48:34 +0000 |
parents | a28d731977da |
children | 25ca3505b4d7 |
comparison
equal
deleted
inserted
replaced
5:a28d731977da | 6:0f494c76a887 |
---|---|
5 def run(file): | 5 def run(file): |
6 global pdf | 6 global pdf |
7 pdf=pdfx.PDFx(file) | 7 pdf=pdfx.PDFx(file) |
8 return pdf.get_references_as_dict() | 8 return pdf.get_references_as_dict() |
9 | 9 |
10 me=sys.argv[1] | 10 tarnum=sys.argv[1] |
11 with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf: | 11 with open('badpdfs_%s'%tarnum,'w') as bf: |
12 for l in sys.stdin: | 12 for l in sys.stdin: |
13 (fno,f)=l.rstrip().split() | 13 (fno,f)=l.split() |
14 try: | 14 try: |
15 links=run(f) | 15 links=run(f) |
16 if bool(links) and (links.get('scrape',False) or | 16 if bool(links) and (links.get('scrape',False) or |
17 links.get('annot',False)): | 17 links.get('annot',False)): |
18 with open('/dev/shm/x/links_%s'%fno,'w') as of: | 18 with open('links_%s_%s'%(tarnum,fno),'w') as of: |
19 for k in links.keys(): | 19 for k in links.keys(): |
20 for l in links[k]: | 20 for l in links[k]: |
21 print("%s\t%s"%(k,l),file=of) | 21 print("%s\t%s"%(k,l),file=of) |
22 except Exception as e: | 22 except Exception as e: |
23 print("%s\t%s"%(fno,e),file=bf) | 23 if str(e)=='Unexpected EOF': |
24 print("%s\t%s\t%s"%(tarnum,fno,e),file=bf) | |
25 else: | |
26 traceback.print_exc(file=bf) | |
27 | |
24 if (path.exists('/dev/shm/stopJob')): | 28 if (path.exists('/dev/shm/stopJob')): |
25 print("Quiting early: %s %s"%(me,fno),file=sys.stderr) | 29 print("Quiting early: %s %s"%(tarnum,fno),file=sys.stderr) |
26 exit(1) | 30 exit(1) |