Mercurial > hg > cc > cirrus_work
diff lib/python/plinks.py @ 120:1d1bd22124c0
moved from bin
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 28 Sep 2023 08:46:01 +0100 |
parents | bin/plinks.py@38bab758e469 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/python/plinks.py Thu Sep 28 08:46:01 2023 +0100 @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +import sys,pdfx,traceback,os +from datetime import datetime + +def run(file): + try: + pdf=pdfx.PDFx(file) + links=pdf.get_references_as_dict() + if bool(links) and (links.get('scrape',False) or + links.get('annot',False)): + for k in links.keys(): + for l in links[k]: + print("%s\t%s"%(k,l)) + else: + print("None") + except Exception as e: + if str(e)=='Unexpected EOF': + print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) + print("badpdf") + else: + print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) + traceback.print_exc(file=sys.stderr) + +if sys.argv[1]=='-': + i=0 + for l in sys.stdin: + print(i,file=sys.stderr) + i+=1 + f=l.rstrip() + if os.path.getsize(f)==1048576: # truncated + print("truncated",file=sys.stderr) + print("truncated") + else: + run(f) + os.unlink(f) +else: + run(sys.argv[1])