Mercurial > hg > cc > cirrus_work
diff bin/plinks.py @ 22:38bab758e469
accept filenames on stdin,
check for 1M => truncation,
always produce some output even if no links
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 29 Sep 2022 16:36:52 +0100 |
parents | 04464ee31d66 |
children |
line wrap: on
line diff
--- a/bin/plinks.py Thu Sep 29 16:33:42 2022 +0100 +++ b/bin/plinks.py Thu Sep 29 16:36:52 2022 +0100 @@ -1,25 +1,37 @@ #!/usr/bin/env python3 -import sys,pdfx,traceback +import sys,pdfx,traceback,os from datetime import datetime def run(file): - global pdf - pdf=pdfx.PDFx(file) - return pdf.get_references_as_dict() - -f=sys.argv[1] + try: + pdf=pdfx.PDFx(file) + links=pdf.get_references_as_dict() + if bool(links) and (links.get('scrape',False) or + links.get('annot',False)): + for k in links.keys(): + for l in links[k]: + print("%s\t%s"%(k,l)) + else: + print("None") + except Exception as e: + if str(e)=='Unexpected EOF': + print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) + print("badpdf") + else: + print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) + traceback.print_exc(file=sys.stderr) -try: - links=run(f) - if bool(links) and (links.get('scrape',False) or - links.get('annot',False)): - for k in links.keys(): - for l in links[k]: - print("%s\t%s"%(k,l)) -except Exception as e: - if str(e)=='Unexpected EOF': - print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) - else: - print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) - traceback.print_exc(file=sys.stderr) - +if sys.argv[1]=='-': + i=0 + for l in sys.stdin: + print(i,file=sys.stderr) + i+=1 + f=l.rstrip() + if os.path.getsize(f)==1048576: # truncated + print("truncated",file=sys.stderr) + print("truncated") + else: + run(f) + os.unlink(f) +else: + run(sys.argv[1])