comparison pdfCrawl.py @ 0:fee51ab07d09

blanket publication of all existing python files in lib/python on maritain
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 09 Mar 2020 14:58:04 +0000
parents
children 0a3abe59e364
comparison
equal deleted inserted replaced
-1:000000000000 0:fee51ab07d09
1 import PyPDF2 as pyPdf, sys
2
3 f = open(sys.argv[1],'rb')
4
5 pdf = pyPdf.PdfFileReader(f)
6 pgs = pdf.getNumPages()
7 key = '/Annots'
8 uri = '/URI'
9 ank = '/A'
10
11 #print pdf.getNamedDestinations()
12
13 for pg in range(pgs):
14 print '#',pg
15 p = pdf.getPage(pg)
16 o = p.getObject()
17 #print >>sys.stderr,o
18 if o.has_key(key):
19 ann = o[key]
20 #print >>sys.stderr,key,ann
21 for a in ann:
22 u = a.getObject()
23 if u[ank].has_key(uri):
24 print "U",u[ank][uri]