Mercurial > hg > python
diff pdfCrawl.py @ 1:0a3abe59e364
updated from more recent versions on origen
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 09 Mar 2020 16:45:20 +0000 |
parents | fee51ab07d09 |
children | 2d7c91f89f6b |
line wrap: on
line diff
--- a/pdfCrawl.py Mon Mar 09 14:58:04 2020 +0000 +++ b/pdfCrawl.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,5 +1,11 @@ import PyPDF2 as pyPdf, sys +if sys.argv[1]=='-v': + verbose=True + sys.argv.pop(1) +else: + verbose=False + f = open(sys.argv[1],'rb') pdf = pyPdf.PdfFileReader(f) @@ -20,5 +26,7 @@ #print >>sys.stderr,key,ann for a in ann: u = a.getObject() - if u[ank].has_key(uri): - print "U",u[ank][uri] + if ank in u and uri in u[ank]: + if verbose: + print u[ank] + print u[ank][uri]