comparison bin/plinks.py @ 16:04464ee31d66

toward link extractions from pdf
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 07 Aug 2022 13:56:49 +0100
parents
children 38bab758e469
comparison
equal deleted inserted replaced
15:a9763cd18949 16:04464ee31d66
1 #!/usr/bin/env python3
2 import sys,pdfx,traceback
3 from datetime import datetime
4
5 def run(file):
6 global pdf
7 pdf=pdfx.PDFx(file)
8 return pdf.get_references_as_dict()
9
10 f=sys.argv[1]
11
12 try:
13 links=run(f)
14 if bool(links) and (links.get('scrape',False) or
15 links.get('annot',False)):
16 for k in links.keys():
17 for l in links[k]:
18 print("%s\t%s"%(k,l))
19 except Exception as e:
20 if str(e)=='Unexpected EOF':
21 print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr)
22 else:
23 print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr)
24 traceback.print_exc(file=sys.stderr)
25