69
|
1 #!/usr/bin/python3
|
|
2 # Needs PYTHONPATH=/group/ltg/projects/lcontrib_sl7/usr/lib/python3.4/site-packages
|
|
3 import sys,pdfx,traceback
|
|
4
|
|
5 import types
|
|
6 if not isinstance(getattr(pdfx.backends.Reference,'__lt__'),types.FunctionType):
|
|
7 def __lt__(self,other):
|
|
8 assert isinstance(other, pdfx.backends.Reference)
|
|
9 return self.ref < other.ref
|
|
10
|
|
11 pdfx.backends.Reference.__lt__=__lt__
|
|
12
|
|
13 E=None
|
|
14
|
|
15 def run():
|
|
16 global pdf, limited
|
|
17 if sys.argv[1]=='-f':
|
|
18 # flatten
|
|
19 flatten=True
|
|
20 sys.argv.pop(1)
|
|
21 else:
|
|
22 flatten=False
|
|
23 try:
|
|
24 pdf=pdfx.PDFx(sys.argv[1],**limited)
|
|
25 if flatten:
|
|
26 links=pdf.get_references(sort=True)
|
|
27 else:
|
|
28 links=pdf.get_references_as_dict(sort=True)
|
|
29 except:
|
|
30 traceback.print_exc()
|
|
31 print("\nFailed: %s"%sys.argv[1],E,file=sys.stderr)
|
|
32 exit(1)
|
|
33 if pdf.limited:
|
|
34 print("Timed out, no text or scraping",file=sys.stderr)
|
|
35 if flatten:
|
|
36 for l in links:
|
|
37 print(l)
|
|
38 else:
|
|
39 for k in links.keys():
|
|
40 for l in links[k]:
|
|
41 print("%s\t%s"%(k,l))
|
|
42
|
|
43 limited={}
|
|
44 if sys.argv[1]=='-v':
|
|
45 # verbose: log level debug
|
|
46 sys.argv.pop(1)
|
|
47 import logging
|
|
48 logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s',
|
|
49 datefmt='%m/%d/%Y %I:%M:%S %p')
|
|
50 logging.getLogger(name='stopit').addHandler(logging.StreamHandler(sys.stderr))
|
|
51 logging.getLogger(name='pdfx').addHandler(logging.StreamHandler(sys.stderr))
|
|
52 logging.getLogger(name='pdfminer').setLevel(logging.WARN)
|
|
53
|
|
54 if sys.argv[1]=='-r':
|
|
55 # timeout for reading
|
|
56 sys.argv.pop(1)
|
|
57 limited['readTimeout']=float(sys.argv.pop(1))
|
|
58 if sys.argv[1]=='-t':
|
|
59 # timeout for text recovery
|
|
60 sys.argv.pop(1)
|
|
61 limited['textTimeout']=float(sys.argv.pop(1))
|
|
62
|
|
63 if sys.argv[1]=='-x':
|
|
64 import timeit
|
|
65 sys.argv.pop(1)
|
|
66 n=sys.argv[1]
|
|
67 sys.argv.pop(1)
|
|
68 print(timeit.timeit("run()",number=int(n),
|
|
69 setup="from __main__ import run"),file=sys.stderr)
|
|
70 else:
|
|
71 run()
|