Mercurial > hg > python
comparison pdfCrawl.py @ 0:fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 09 Mar 2020 14:58:04 +0000 |
parents | |
children | 0a3abe59e364 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fee51ab07d09 |
---|---|
1 import PyPDF2 as pyPdf, sys | |
2 | |
3 f = open(sys.argv[1],'rb') | |
4 | |
5 pdf = pyPdf.PdfFileReader(f) | |
6 pgs = pdf.getNumPages() | |
7 key = '/Annots' | |
8 uri = '/URI' | |
9 ank = '/A' | |
10 | |
11 #print pdf.getNamedDestinations() | |
12 | |
13 for pg in range(pgs): | |
14 print '#',pg | |
15 p = pdf.getPage(pg) | |
16 o = p.getObject() | |
17 #print >>sys.stderr,o | |
18 if o.has_key(key): | |
19 ann = o[key] | |
20 #print >>sys.stderr,key,ann | |
21 for a in ann: | |
22 u = a.getObject() | |
23 if u[ank].has_key(uri): | |
24 print "U",u[ank][uri] |