Mercurial > hg > python
view plinks_jto.py @ 69:157f012ffab7 default tip
from local
author | Henry S Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 17 Jan 2025 15:45:26 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/python3 # Needs PYTHONPATH=/group/ltg/projects/lcontrib_sl7/usr/lib/python3.4/site-packages import sys,pdfx,traceback import types if not isinstance(getattr(pdfx.backends.Reference,'__lt__'),types.FunctionType): def __lt__(self,other): assert isinstance(other, pdfx.backends.Reference) return self.ref < other.ref pdfx.backends.Reference.__lt__=__lt__ E=None def run(): global pdf, limited if sys.argv[1]=='-f': # flatten flatten=True sys.argv.pop(1) else: flatten=False try: pdf=pdfx.PDFx(sys.argv[1],**limited) if flatten: links=pdf.get_references(sort=True) else: links=pdf.get_references_as_dict(sort=True) except: traceback.print_exc() print("\nFailed: %s"%sys.argv[1],E,file=sys.stderr) exit(1) if pdf.limited: print("Timed out, no text or scraping",file=sys.stderr) if flatten: for l in links: print(l) else: for k in links.keys(): for l in links[k]: print("%s\t%s"%(k,l)) limited={} if sys.argv[1]=='-v': # verbose: log level debug sys.argv.pop(1) import logging logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') logging.getLogger(name='stopit').addHandler(logging.StreamHandler(sys.stderr)) logging.getLogger(name='pdfx').addHandler(logging.StreamHandler(sys.stderr)) logging.getLogger(name='pdfminer').setLevel(logging.WARN) if sys.argv[1]=='-r': # timeout for reading sys.argv.pop(1) limited['readTimeout']=float(sys.argv.pop(1)) if sys.argv[1]=='-t': # timeout for text recovery sys.argv.pop(1) limited['textTimeout']=float(sys.argv.pop(1)) if sys.argv[1]=='-x': import timeit sys.argv.pop(1) n=sys.argv[1] sys.argv.pop(1) print(timeit.timeit("run()",number=int(n), setup="from __main__ import run"),file=sys.stderr) else: run()