Mercurial > hg > cc > cirrus_home
annotate bin/plinks.py @ 5:a28d731977da
merge
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 19 Feb 2020 10:41:59 +0000 |
parents | 462179da7dc2 cbd13beb0922 |
children | 0f494c76a887 |
rev | line source |
---|---|
0 | 1 #!/lustre/sw/miniconda3/bin/python3 |
2 import sys,pdfx,traceback | |
1
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
3 from os import path |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
4 |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
5 def run(file): |
0 | 6 global pdf |
1
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
7 pdf=pdfx.PDFx(file) |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
8 return pdf.get_references_as_dict() |
0 | 9 |
1
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
10 me=sys.argv[1] |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
11 with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf: |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
12 for l in sys.stdin: |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
13 (fno,f)=l.rstrip().split() |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
14 try: |
2 | 15 links=run(f) |
4
462179da7dc2
try harder not to write empty links files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
2
diff
changeset
|
16 if bool(links) and (links.get('scrape',False) or |
462179da7dc2
try harder not to write empty links files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
2
diff
changeset
|
17 links.get('annot',False)): |
3
cbd13beb0922
only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents:
2
diff
changeset
|
18 with open('/dev/shm/x/links_%s'%fno,'w') as of: |
cbd13beb0922
only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents:
2
diff
changeset
|
19 for k in links.keys(): |
cbd13beb0922
only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents:
2
diff
changeset
|
20 for l in links[k]: |
cbd13beb0922
only create links file if there are some
Henry Thompson <ht@markup.co.uk>
parents:
2
diff
changeset
|
21 print("%s\t%s"%(k,l),file=of) |
1
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
22 except Exception as e: |
2 | 23 print("%s\t%s"%(fno,e),file=bf) |
1
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
24 if (path.exists('/dev/shm/stopJob')): |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
25 print("Quiting early: %s %s"%(me,fno),file=sys.stderr) |
a4b0359456bc
switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
Henry Thompson <ht@markup.co.uk>
parents:
0
diff
changeset
|
26 exit(1) |