# HG changeset patch # User Henry S. Thompson # Date 1659877009 -3600 # Node ID 04464ee31d66ac616a8a1dc4b73ee6d6bfebd7b3 # Parent a9763cd189497e3fe4243516cc52907af17f9def toward link extractions from pdf diff -r a9763cd18949 -r 04464ee31d66 bin/plinks.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/plinks.py Sun Aug 07 13:56:49 2022 +0100 @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +import sys,pdfx,traceback +from datetime import datetime + +def run(file): + global pdf + pdf=pdfx.PDFx(file) + return pdf.get_references_as_dict() + +f=sys.argv[1] + +try: + links=run(f) + if bool(links) and (links.get('scrape',False) or + links.get('annot',False)): + for k in links.keys(): + for l in links[k]: + print("%s\t%s"%(k,l)) +except Exception as e: + if str(e)=='Unexpected EOF': + print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) + else: + print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) + traceback.print_exc(file=sys.stderr) +