Mercurial > hg > cc > cirrus_work
comparison lib/python/plinks.py @ 120:1d1bd22124c0
moved from bin
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 28 Sep 2023 08:46:01 +0100 |
parents | bin/plinks.py@38bab758e469 |
children |
comparison
equal
deleted
inserted
replaced
119:1d12b51c4d59 | 120:1d1bd22124c0 |
---|---|
1 #!/usr/bin/env python3 | |
2 import sys,pdfx,traceback,os | |
3 from datetime import datetime | |
4 | |
5 def run(file): | |
6 try: | |
7 pdf=pdfx.PDFx(file) | |
8 links=pdf.get_references_as_dict() | |
9 if bool(links) and (links.get('scrape',False) or | |
10 links.get('annot',False)): | |
11 for k in links.keys(): | |
12 for l in links[k]: | |
13 print("%s\t%s"%(k,l)) | |
14 else: | |
15 print("None") | |
16 except Exception as e: | |
17 if str(e)=='Unexpected EOF': | |
18 print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) | |
19 print("badpdf") | |
20 else: | |
21 print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) | |
22 traceback.print_exc(file=sys.stderr) | |
23 | |
24 if sys.argv[1]=='-': | |
25 i=0 | |
26 for l in sys.stdin: | |
27 print(i,file=sys.stderr) | |
28 i+=1 | |
29 f=l.rstrip() | |
30 if os.path.getsize(f)==1048576: # truncated | |
31 print("truncated",file=sys.stderr) | |
32 print("truncated") | |
33 else: | |
34 run(f) | |
35 os.unlink(f) | |
36 else: | |
37 run(sys.argv[1]) |