Mercurial > hg > python
comparison plinks_jto.py @ 69:157f012ffab7 default tip
from local
author | Henry S Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 17 Jan 2025 15:45:26 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
68:eb91fd5d49b3 | 69:157f012ffab7 |
---|---|
1 #!/usr/bin/python3 | |
2 # Needs PYTHONPATH=/group/ltg/projects/lcontrib_sl7/usr/lib/python3.4/site-packages | |
3 import sys,pdfx,traceback | |
4 | |
5 import types | |
6 if not isinstance(getattr(pdfx.backends.Reference,'__lt__'),types.FunctionType): | |
7 def __lt__(self,other): | |
8 assert isinstance(other, pdfx.backends.Reference) | |
9 return self.ref < other.ref | |
10 | |
11 pdfx.backends.Reference.__lt__=__lt__ | |
12 | |
13 E=None | |
14 | |
15 def run(): | |
16 global pdf, limited | |
17 if sys.argv[1]=='-f': | |
18 # flatten | |
19 flatten=True | |
20 sys.argv.pop(1) | |
21 else: | |
22 flatten=False | |
23 try: | |
24 pdf=pdfx.PDFx(sys.argv[1],**limited) | |
25 if flatten: | |
26 links=pdf.get_references(sort=True) | |
27 else: | |
28 links=pdf.get_references_as_dict(sort=True) | |
29 except: | |
30 traceback.print_exc() | |
31 print("\nFailed: %s"%sys.argv[1],E,file=sys.stderr) | |
32 exit(1) | |
33 if pdf.limited: | |
34 print("Timed out, no text or scraping",file=sys.stderr) | |
35 if flatten: | |
36 for l in links: | |
37 print(l) | |
38 else: | |
39 for k in links.keys(): | |
40 for l in links[k]: | |
41 print("%s\t%s"%(k,l)) | |
42 | |
43 limited={} | |
44 if sys.argv[1]=='-v': | |
45 # verbose: log level debug | |
46 sys.argv.pop(1) | |
47 import logging | |
48 logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s', | |
49 datefmt='%m/%d/%Y %I:%M:%S %p') | |
50 logging.getLogger(name='stopit').addHandler(logging.StreamHandler(sys.stderr)) | |
51 logging.getLogger(name='pdfx').addHandler(logging.StreamHandler(sys.stderr)) | |
52 logging.getLogger(name='pdfminer').setLevel(logging.WARN) | |
53 | |
54 if sys.argv[1]=='-r': | |
55 # timeout for reading | |
56 sys.argv.pop(1) | |
57 limited['readTimeout']=float(sys.argv.pop(1)) | |
58 if sys.argv[1]=='-t': | |
59 # timeout for text recovery | |
60 sys.argv.pop(1) | |
61 limited['textTimeout']=float(sys.argv.pop(1)) | |
62 | |
63 if sys.argv[1]=='-x': | |
64 import timeit | |
65 sys.argv.pop(1) | |
66 n=sys.argv[1] | |
67 sys.argv.pop(1) | |
68 print(timeit.timeit("run()",number=int(n), | |
69 setup="from __main__ import run"),file=sys.stderr) | |
70 else: | |
71 run() |