Mercurial > hg > cc > cirrus_work
comparison bin/plinks.py @ 16:04464ee31d66
toward link extractions from pdf
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 07 Aug 2022 13:56:49 +0100 |
parents | |
children | 38bab758e469 |
comparison
equal
deleted
inserted
replaced
15:a9763cd18949 | 16:04464ee31d66 |
---|---|
1 #!/usr/bin/env python3 | |
2 import sys,pdfx,traceback | |
3 from datetime import datetime | |
4 | |
5 def run(file): | |
6 global pdf | |
7 pdf=pdfx.PDFx(file) | |
8 return pdf.get_references_as_dict() | |
9 | |
10 f=sys.argv[1] | |
11 | |
12 try: | |
13 links=run(f) | |
14 if bool(links) and (links.get('scrape',False) or | |
15 links.get('annot',False)): | |
16 for k in links.keys(): | |
17 for l in links[k]: | |
18 print("%s\t%s"%(k,l)) | |
19 except Exception as e: | |
20 if str(e)=='Unexpected EOF': | |
21 print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr) | |
22 else: | |
23 print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr) | |
24 traceback.print_exc(file=sys.stderr) | |
25 |