changeset 1:0a3abe59e364

updated from more recent versions on origen
author Henry Thompson <ht@markup.co.uk>
date Mon, 09 Mar 2020 16:45:20 +0000
parents fee51ab07d09
children e07789816ca5
files bobi.py modify.py pdfCrawl.py
diffstat 3 files changed, 27 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/bobi.py	Mon Mar 09 14:58:04 2020 +0000
+++ b/bobi.py	Mon Mar 09 16:45:20 2020 +0000
@@ -1,9 +1,9 @@
 #!/bin/python
 from sys import stdin
-from urllib2 import Request,urlopen, HTTPError
+from urllib2 import Request,urlopen, HTTPError, URLError
 
 l=''
-year='2015'
+year='2016'
 uuns={}
 
 def cc(names):
@@ -52,19 +52,25 @@
   req='<app year="%s" uun="%s" type="PHD %s" surname="%s" forenames="%s" cat="%s" stat="%s" decision="%s" pgm="PhD ILCC" entry="%s" email="%s" country="%s" nationality="%s"/>'%(year,uun,ptype,surname,forenames,cat,stat,dec,entry,email,country,nat)
   #print req.encode('iso-8859-1')
   #continue
-  r=Request("http://localhost:8080/exist/apps/phd/new-app-maybe.xq",
+  r=Request("http://troutbeck.inf.ed.ac.uk:8080/exist/apps/phd/new-app-maybe.xq",
             req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'})
   try:
     res=urlopen(r)
-  except HTTPError as err:
-    print "Error:",err.read()
-    print req
-    exit(1)
+    host="troutbeck.inf.ed.ac.uk"
+  except URLError as err1:
+    r=Request("http://localhost:8080/exist/apps/phd/new-app-maybe.xq",
+              req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'})
+    try:
+      res=urlopen(r)
+      host="localhost"
+    except URLError as err:
+      print "Failed, no way to database server:",err1.read(),err.read()
+      exit(1)
   res=res.read()
   print ptype,res
   if (not oldf) and res.find("<div>We already")==0:
     req='<update year="%s" uun="%s" nationality="%s"/>'%(year,uun,nat)
-    r=Request("http://localhost:8080/exist/apps/phd/updateApp.xq",
+    r=Request("http://%s:8080/exist/apps/phd/updateApp.xq"%host,
               req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'})
     try:
       res=urlopen(r)
--- a/modify.py	Mon Mar 09 14:58:04 2020 +0000
+++ b/modify.py	Mon Mar 09 16:45:20 2020 +0000
@@ -1,10 +1,10 @@
 #!/bin/python
-# Usage: modify.py uun fields...
+# Usage: modify.py fields...
 from sys import stdin,argv
 from urllib2 import Request,urlopen, HTTPError
 
 l=''
-year='2014'
+year='2016'
 uuns={}
 
 def cc(names):
@@ -25,7 +25,7 @@
   attrs=" ".join(map(lambda (n,v):'%s="%s"'%(n,v),zip(eargs,vals)))
   req='<update year="%s" %s/>'%(year,attrs)
   print req
-  r=Request("http://localhost:8080/exist/apps/phd/updateApp.xq",
+  r=Request("http://troutbeck:8080/exist/apps/phd/updateApp.xq",
             req.encode('utf-8'),headers={'Content-Type':'application/xml;charset=UTF-8'})
   try:
     res=urlopen(r)
--- a/pdfCrawl.py	Mon Mar 09 14:58:04 2020 +0000
+++ b/pdfCrawl.py	Mon Mar 09 16:45:20 2020 +0000
@@ -1,5 +1,11 @@
 import PyPDF2 as pyPdf, sys
 
+if sys.argv[1]=='-v':
+    verbose=True
+    sys.argv.pop(1)
+else:
+    verbose=False
+
 f = open(sys.argv[1],'rb')
 
 pdf = pyPdf.PdfFileReader(f)
@@ -20,5 +26,7 @@
         #print >>sys.stderr,key,ann
         for a in ann:
             u = a.getObject()
-            if u[ank].has_key(uri):
-                print "U",u[ank][uri]
+            if ank in u and uri in u[ank]:
+                if verbose:
+                    print u[ank]
+                print u[ank][uri]