# HG changeset patch # User Henry Thompson # Date 1583772320 0 # Node ID 0a3abe59e364c086270aa57ce7326815ecd5a6f0 # Parent fee51ab07d0999d491bcea32a21a00befc38100f updated from more recent versions on origen diff -r fee51ab07d09 -r 0a3abe59e364 bobi.py --- a/bobi.py Mon Mar 09 14:58:04 2020 +0000 +++ b/bobi.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,9 +1,9 @@ #!/bin/python from sys import stdin -from urllib2 import Request,urlopen, HTTPError +from urllib2 import Request,urlopen, HTTPError, URLError l='' -year='2015' +year='2016' uuns={} def cc(names): @@ -52,19 +52,25 @@ req=''%(year,uun,ptype,surname,forenames,cat,stat,dec,entry,email,country,nat) #print req.encode('iso-8859-1') #continue - r=Request("http://localhost:8080/exist/apps/phd/new-app-maybe.xq", + r=Request("http://troutbeck.inf.ed.ac.uk:8080/exist/apps/phd/new-app-maybe.xq", req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) try: res=urlopen(r) - except HTTPError as err: - print "Error:",err.read() - print req - exit(1) + host="troutbeck.inf.ed.ac.uk" + except URLError as err1: + r=Request("http://localhost:8080/exist/apps/phd/new-app-maybe.xq", + req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) + try: + res=urlopen(r) + host="localhost" + except URLError as err: + print "Failed, no way to database server:",err1.read(),err.read() + exit(1) res=res.read() print ptype,res if (not oldf) and res.find("
We already")==0: req=''%(year,uun,nat) - r=Request("http://localhost:8080/exist/apps/phd/updateApp.xq", + r=Request("http://%s:8080/exist/apps/phd/updateApp.xq"%host, req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) try: res=urlopen(r) diff -r fee51ab07d09 -r 0a3abe59e364 modify.py --- a/modify.py Mon Mar 09 14:58:04 2020 +0000 +++ b/modify.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,10 +1,10 @@ #!/bin/python -# Usage: modify.py uun fields... +# Usage: modify.py fields... from sys import stdin,argv from urllib2 import Request,urlopen, HTTPError l='' -year='2014' +year='2016' uuns={} def cc(names): @@ -25,7 +25,7 @@ attrs=" ".join(map(lambda (n,v):'%s="%s"'%(n,v),zip(eargs,vals))) req=''%(year,attrs) print req - r=Request("http://localhost:8080/exist/apps/phd/updateApp.xq", + r=Request("http://troutbeck:8080/exist/apps/phd/updateApp.xq", req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'}) try: res=urlopen(r) diff -r fee51ab07d09 -r 0a3abe59e364 pdfCrawl.py --- a/pdfCrawl.py Mon Mar 09 14:58:04 2020 +0000 +++ b/pdfCrawl.py Mon Mar 09 16:45:20 2020 +0000 @@ -1,5 +1,11 @@ import PyPDF2 as pyPdf, sys +if sys.argv[1]=='-v': + verbose=True + sys.argv.pop(1) +else: + verbose=False + f = open(sys.argv[1],'rb') pdf = pyPdf.PdfFileReader(f) @@ -20,5 +26,7 @@ #print >>sys.stderr,key,ann for a in ann: u = a.getObject() - if u[ank].has_key(uri): - print "U",u[ank][uri] + if ank in u and uri in u[ank]: + if verbose: + print u[ank] + print u[ank][uri]