annotate bin/plinks.py @ 5:a28d731977da

merge
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 19 Feb 2020 10:41:59 +0000
parents 462179da7dc2 cbd13beb0922
children 0f494c76a887
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
65a56c0d1c1f bolting the barn door...
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/lustre/sw/miniconda3/bin/python3
65a56c0d1c1f bolting the barn door...
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 import sys,pdfx,traceback
1
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
3 from os import path
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
4
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
5 def run(file):
0
65a56c0d1c1f bolting the barn door...
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 global pdf
1
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
7 pdf=pdfx.PDFx(file)
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
8 return pdf.get_references_as_dict()
0
65a56c0d1c1f bolting the barn door...
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9
1
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
10 me=sys.argv[1]
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
11 with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf:
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
12 for l in sys.stdin:
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
13 (fno,f)=l.rstrip().split()
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
14 try:
2
Henry Thompson <ht@markup.co.uk>
parents: 1
diff changeset
15 links=run(f)
4
462179da7dc2 try harder not to write empty links files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 2
diff changeset
16 if bool(links) and (links.get('scrape',False) or
462179da7dc2 try harder not to write empty links files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 2
diff changeset
17 links.get('annot',False)):
3
cbd13beb0922 only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents: 2
diff changeset
18 with open('/dev/shm/x/links_%s'%fno,'w') as of:
cbd13beb0922 only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents: 2
diff changeset
19 for k in links.keys():
cbd13beb0922 only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents: 2
diff changeset
20 for l in links[k]:
cbd13beb0922 only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents: 2
diff changeset
21 print("%s\t%s"%(k,l),file=of)
1
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
22 except Exception as e:
2
Henry Thompson <ht@markup.co.uk>
parents: 1
diff changeset
23 print("%s\t%s"%(fno,e),file=bf)
1
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
24 if (path.exists('/dev/shm/stopJob')):
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
25 print("Quiting early: %s %s"%(me,fno),file=sys.stderr)
a4b0359456bc switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents: 0
diff changeset
26 exit(1)