Mercurial > hg > python
annotate pdfComments.py @ 17:6c9e371b0325
doesn't break, what's there is good, but stuck part-way in 10a
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 25 Mar 2020 19:21:38 +0000 |
parents | e07789816ca5 |
children |
rev | line source |
---|---|
2
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
1 import PyPDF2 as pyPdf, sys |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
2 |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
3 if sys.argv[1]=='-v': |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
4 verbose=True |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
5 sys.argv.pop(1) |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
6 else: |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
7 verbose=False |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
8 |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
9 f = open(sys.argv[1],'rb') |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
10 |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
11 pdf = pyPdf.PdfFileReader(f) |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
12 pgs = pdf.getNumPages() |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
13 key = '/Annots' |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
14 uri = '/URI' |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
15 ank = '/A' |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
16 |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
17 #print pdf.getNamedDestinations() |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
18 |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
19 for pg in range(pgs): |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
20 print '#',pg |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
21 p = pdf.getPage(pg) |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
22 o = p.getObject() |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
23 #print o.keys() |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
24 if o.has_key(key): |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
25 ann = o[key] |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
26 #print key,ann |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
27 for a in ann: |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
28 u = a.getObject() |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
29 if '/Contents' in u: |
e07789816ca5
adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
30 print "%s: %s"%(u['/Subtype'],u['/Contents']) |