Mercurial > hg > python
diff plinks_jto.py @ 69:157f012ffab7 default tip
from local
author | Henry S Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 17 Jan 2025 15:45:26 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plinks_jto.py Fri Jan 17 15:45:26 2025 +0000 @@ -0,0 +1,71 @@ +#!/usr/bin/python3 +# Needs PYTHONPATH=/group/ltg/projects/lcontrib_sl7/usr/lib/python3.4/site-packages +import sys,pdfx,traceback + +import types +if not isinstance(getattr(pdfx.backends.Reference,'__lt__'),types.FunctionType): + def __lt__(self,other): + assert isinstance(other, pdfx.backends.Reference) + return self.ref < other.ref + + pdfx.backends.Reference.__lt__=__lt__ + +E=None + +def run(): + global pdf, limited + if sys.argv[1]=='-f': + # flatten + flatten=True + sys.argv.pop(1) + else: + flatten=False + try: + pdf=pdfx.PDFx(sys.argv[1],**limited) + if flatten: + links=pdf.get_references(sort=True) + else: + links=pdf.get_references_as_dict(sort=True) + except: + traceback.print_exc() + print("\nFailed: %s"%sys.argv[1],E,file=sys.stderr) + exit(1) + if pdf.limited: + print("Timed out, no text or scraping",file=sys.stderr) + if flatten: + for l in links: + print(l) + else: + for k in links.keys(): + for l in links[k]: + print("%s\t%s"%(k,l)) + +limited={} +if sys.argv[1]=='-v': + # verbose: log level debug + sys.argv.pop(1) + import logging + logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s', + datefmt='%m/%d/%Y %I:%M:%S %p') + logging.getLogger(name='stopit').addHandler(logging.StreamHandler(sys.stderr)) + logging.getLogger(name='pdfx').addHandler(logging.StreamHandler(sys.stderr)) + logging.getLogger(name='pdfminer').setLevel(logging.WARN) + +if sys.argv[1]=='-r': + # timeout for reading + sys.argv.pop(1) + limited['readTimeout']=float(sys.argv.pop(1)) +if sys.argv[1]=='-t': + # timeout for text recovery + sys.argv.pop(1) + limited['textTimeout']=float(sys.argv.pop(1)) + +if sys.argv[1]=='-x': + import timeit + sys.argv.pop(1) + n=sys.argv[1] + sys.argv.pop(1) + print(timeit.timeit("run()",number=int(n), + setup="from __main__ import run"),file=sys.stderr) +else: + run()