Mercurial > hg > python
changeset 1:0a3abe59e364
updated from more recent versions on origen
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 09 Mar 2020 16:45:20 +0000 |
parents | fee51ab07d09 |
children | e07789816ca5 |
files | bobi.py modify.py pdfCrawl.py |
diffstat | 3 files changed, 27 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/bobi.py Mon Mar 09 14:58:04 2020 +0000 +++ b/bobi.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,9 +1,9 @@ #!/bin/python from sys import stdin -from urllib2 import Request,urlopen, HTTPError +from urllib2 import Request,urlopen, HTTPError, URLError l='' -year='2015' +year='2016' uuns={} def cc(names): @@ -52,19 +52,25 @@ req='<app year="%s" uun="%s" type="PHD %s" surname="%s" forenames="%s" cat="%s" stat="%s" decision="%s" pgm="PhD ILCC" entry="%s" email="%s" country="%s" nationality="%s"/>'%(year,uun,ptype,surname,forenames,cat,stat,dec,entry,email,country,nat) #print req.encode('iso-8859-1') #continue - r=Request("http://localhost:8080/exist/apps/phd/new-app-maybe.xq", + r=Request("http://troutbeck.inf.ed.ac.uk:8080/exist/apps/phd/new-app-maybe.xq", req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) try: res=urlopen(r) - except HTTPError as err: - print "Error:",err.read() - print req - exit(1) + host="troutbeck.inf.ed.ac.uk" + except URLError as err1: + r=Request("http://localhost:8080/exist/apps/phd/new-app-maybe.xq", + req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) + try: + res=urlopen(r) + host="localhost" + except URLError as err: + print "Failed, no way to database server:",err1.read(),err.read() + exit(1) res=res.read() print ptype,res if (not oldf) and res.find("<div>We already")==0: req='<update year="%s" uun="%s" nationality="%s"/>'%(year,uun,nat) - r=Request("http://localhost:8080/exist/apps/phd/updateApp.xq", + r=Request("http://%s:8080/exist/apps/phd/updateApp.xq"%host, req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) try: res=urlopen(r)
--- a/modify.py Mon Mar 09 14:58:04 2020 +0000 +++ b/modify.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,10 +1,10 @@ #!/bin/python -# Usage: modify.py uun fields... +# Usage: modify.py fields... from sys import stdin,argv from urllib2 import Request,urlopen, HTTPError l='' -year='2014' +year='2016' uuns={} def cc(names): @@ -25,7 +25,7 @@ attrs=" ".join(map(lambda (n,v):'%s="%s"'%(n,v),zip(eargs,vals))) req='<update year="%s" %s/>'%(year,attrs) print req - r=Request("http://localhost:8080/exist/apps/phd/updateApp.xq", + r=Request("http://troutbeck:8080/exist/apps/phd/updateApp.xq", req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) try: res=urlopen(r)
--- a/pdfCrawl.py Mon Mar 09 14:58:04 2020 +0000 +++ b/pdfCrawl.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,5 +1,11 @@ import PyPDF2 as pyPdf, sys +if sys.argv[1]=='-v': + verbose=True + sys.argv.pop(1) +else: + verbose=False + f = open(sys.argv[1],'rb') pdf = pyPdf.PdfFileReader(f) @@ -20,5 +26,7 @@ #print >>sys.stderr,key,ann for a in ann: u = a.getObject() - if u[ank].has_key(uri): - print "U",u[ank][uri] + if ank in u and uri in u[ank]: + if verbose: + print u[ank] + print u[ank][uri]