Mercurial > hg > cc > cirrus_work
comparison bin/plinks.py @ 22:38bab758e469
accept filenames on stdin,
check for 1M => truncation,
always produce some output even if no links
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 29 Sep 2022 16:36:52 +0100 |
parents | 04464ee31d66 |
children |
comparison
equal
deleted
inserted
replaced
21:cbac7dfe2f24 | 22:38bab758e469 |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 import sys,pdfx,traceback | 2 import sys,pdfx,traceback,os |
3 from datetime import datetime | 3 from datetime import datetime |
4 | 4 |
5 def run(file): | 5 def run(file): |
6 global pdf | 6 try: |
7 pdf=pdfx.PDFx(file) | 7 pdf=pdfx.PDFx(file) |
8 return pdf.get_references_as_dict() | 8 links=pdf.get_references_as_dict() |
9 if bool(links) and (links.get('scrape',False) or | |
10 links.get('annot',False)): | |
11 for k in links.keys(): | |
12 for l in links[k]: | |
13 print("%s\t%s"%(k,l)) | |
14 else: | |
15 print("None") | |
16 except Exception as e: | |
17 if str(e)=='Unexpected EOF': | |
18 print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) | |
19 print("badpdf") | |
20 else: | |
21 print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) | |
22 traceback.print_exc(file=sys.stderr) | |
9 | 23 |
10 f=sys.argv[1] | 24 if sys.argv[1]=='-': |
11 | 25 i=0 |
12 try: | 26 for l in sys.stdin: |
13 links=run(f) | 27 print(i,file=sys.stderr) |
14 if bool(links) and (links.get('scrape',False) or | 28 i+=1 |
15 links.get('annot',False)): | 29 f=l.rstrip() |
16 for k in links.keys(): | 30 if os.path.getsize(f)==1048576: # truncated |
17 for l in links[k]: | 31 print("truncated",file=sys.stderr) |
18 print("%s\t%s"%(k,l)) | 32 print("truncated") |
19 except Exception as e: | 33 else: |
20 if str(e)=='Unexpected EOF': | 34 run(f) |
21 print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) | 35 os.unlink(f) |
22 else: | 36 else: |
23 print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) | 37 run(sys.argv[1]) |
24 traceback.print_exc(file=sys.stderr) | |
25 |