# HG changeset patch # User Henry S. Thompson # Date 1664465812 -3600 # Node ID 38bab758e4698ff4f2bf81730e1d932c1aa73b32 # Parent cbac7dfe2f24bafd8db5e0d728ca96a3f25e75a1 accept filenames on stdin, check for 1M => truncation, always produce some output even if no links diff -r cbac7dfe2f24 -r 38bab758e469 bin/plinks.py --- a/bin/plinks.py Thu Sep 29 16:33:42 2022 +0100 +++ b/bin/plinks.py Thu Sep 29 16:36:52 2022 +0100 @@ -1,25 +1,37 @@ #!/usr/bin/env python3 -import sys,pdfx,traceback +import sys,pdfx,traceback,os from datetime import datetime def run(file): - global pdf - pdf=pdfx.PDFx(file) - return pdf.get_references_as_dict() - -f=sys.argv[1] + try: + pdf=pdfx.PDFx(file) + links=pdf.get_references_as_dict() + if bool(links) and (links.get('scrape',False) or + links.get('annot',False)): + for k in links.keys(): + for l in links[k]: + print("%s\t%s"%(k,l)) + else: + print("None") + except Exception as e: + if str(e)=='Unexpected EOF': + print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) + print("badpdf") + else: + print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) + traceback.print_exc(file=sys.stderr) -try: - links=run(f) - if bool(links) and (links.get('scrape',False) or - links.get('annot',False)): - for k in links.keys(): - for l in links[k]: - print("%s\t%s"%(k,l)) -except Exception as e: - if str(e)=='Unexpected EOF': - print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) - else: - print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) - traceback.print_exc(file=sys.stderr) - +if sys.argv[1]=='-': + i=0 + for l in sys.stdin: + print(i,file=sys.stderr) + i+=1 + f=l.rstrip() + if os.path.getsize(f)==1048576: # truncated + print("truncated",file=sys.stderr) + print("truncated") + else: + run(f) + os.unlink(f) +else: + run(sys.argv[1])