comparison plinks_jto.py @ 69:157f012ffab7 default tip

from local
author Henry S Thompson <ht@inf.ed.ac.uk>
date Fri, 17 Jan 2025 15:45:26 +0000
parents
children
comparison
equal deleted inserted replaced
68:eb91fd5d49b3 69:157f012ffab7
1 #!/usr/bin/python3
2 # Needs PYTHONPATH=/group/ltg/projects/lcontrib_sl7/usr/lib/python3.4/site-packages
3 import sys,pdfx,traceback
4
5 import types
6 if not isinstance(getattr(pdfx.backends.Reference,'__lt__'),types.FunctionType):
7 def __lt__(self,other):
8 assert isinstance(other, pdfx.backends.Reference)
9 return self.ref < other.ref
10
11 pdfx.backends.Reference.__lt__=__lt__
12
13 E=None
14
15 def run():
16 global pdf, limited
17 if sys.argv[1]=='-f':
18 # flatten
19 flatten=True
20 sys.argv.pop(1)
21 else:
22 flatten=False
23 try:
24 pdf=pdfx.PDFx(sys.argv[1],**limited)
25 if flatten:
26 links=pdf.get_references(sort=True)
27 else:
28 links=pdf.get_references_as_dict(sort=True)
29 except:
30 traceback.print_exc()
31 print("\nFailed: %s"%sys.argv[1],E,file=sys.stderr)
32 exit(1)
33 if pdf.limited:
34 print("Timed out, no text or scraping",file=sys.stderr)
35 if flatten:
36 for l in links:
37 print(l)
38 else:
39 for k in links.keys():
40 for l in links[k]:
41 print("%s\t%s"%(k,l))
42
43 limited={}
44 if sys.argv[1]=='-v':
45 # verbose: log level debug
46 sys.argv.pop(1)
47 import logging
48 logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s',
49 datefmt='%m/%d/%Y %I:%M:%S %p')
50 logging.getLogger(name='stopit').addHandler(logging.StreamHandler(sys.stderr))
51 logging.getLogger(name='pdfx').addHandler(logging.StreamHandler(sys.stderr))
52 logging.getLogger(name='pdfminer').setLevel(logging.WARN)
53
54 if sys.argv[1]=='-r':
55 # timeout for reading
56 sys.argv.pop(1)
57 limited['readTimeout']=float(sys.argv.pop(1))
58 if sys.argv[1]=='-t':
59 # timeout for text recovery
60 sys.argv.pop(1)
61 limited['textTimeout']=float(sys.argv.pop(1))
62
63 if sys.argv[1]=='-x':
64 import timeit
65 sys.argv.pop(1)
66 n=sys.argv[1]
67 sys.argv.pop(1)
68 print(timeit.timeit("run()",number=int(n),
69 setup="from __main__ import run"),file=sys.stderr)
70 else:
71 run()