changeset 1:a4b0359456bc

switch to file loop inside python, assume file index integer in pipe as well as filename, check /dev/shm/stopJob
author Henry Thompson <ht@markup.co.uk>
date Tue, 18 Feb 2020 21:33:35 +0000
parents 65a56c0d1c1f
children 83ed7c5846b2
files bin/doPlinks.sh bin/plinks.py
diffstat 2 files changed, 32 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/bin/doPlinks.sh	Tue Feb 18 13:15:05 2020 +0000
+++ b/bin/doPlinks.sh	Tue Feb 18 21:33:35 2020 +0000
@@ -1,15 +1,14 @@
 #!/usr/bin/bash
-me=$1
-mine=0
-mkdir /dev/shm/x
-while read f
-do
-    if plinks.py $f > /dev/shm/x/links_${me}_${mine} 2>/dev/null
-    then
-	((mine+=1))
-    else
-	echo $f >> /dev/shm/x/badpdfs_$me
-	rm -f /dev/shm/x/links_${me}_${mine}
-    fi
-done
+mkdir -p /dev/shm/x
+plinks.py $1
+# while read f
+# do
+#     if plinks.py $f > /dev/shm/x/links_${me}_${mine} 2>/dev/null
+#     then
+# 	((mine+=1))
+#     else
+# 	echo $f >> /dev/shm/x/badpdfs_$me
+# 	rm -f /dev/shm/x/links_${me}_${mine}
+#     fi
+# done
 rsync -a /dev/shm/x/ links
--- a/bin/plinks.py	Tue Feb 18 13:15:05 2020 +0000
+++ b/bin/plinks.py	Tue Feb 18 21:33:35 2020 +0000
@@ -1,24 +1,24 @@
 #!/lustre/sw/miniconda3/bin/python3
 import sys,pdfx,traceback
-def run():
+from os import path
+
+def run(file):
   global pdf
-  try:
-    pdf=pdfx.PDFx(sys.argv[1])
-    links=pdf.get_references_as_dict()
-  except:
-    traceback.print_exc()
-    print("\nFailed: %s"%sys.argv[1],file=sys.stderr)
-    exit(1)
-  for k in links.keys():
-    for l in links[k]:
-         print("%s\t%s"%(k,l))
+  pdf=pdfx.PDFx(file)
+  return pdf.get_references_as_dict()
 
-if sys.argv[1]=='-t':
-  import timeit
-  sys.argv.pop(1)
-  n=sys.argv[1]
-  sys.argv.pop(1)
-  print(timeit.timeit("run()",number=int(n),
-                      setup="from __main__ import run"),file=sys.stderr)
-else:
-  run()
+me=sys.argv[1]
+with open('/dev/shm/x/badpdfs_%s'%me,'w') as bf:
+  for l in sys.stdin:
+    (fno,f)=l.rstrip().split()
+    try:
+      links=run(file)
+      with open('/dev/shm/x/links_%s'%fno,'w') as of:
+        for k in links.keys():
+          for l in links[k]:
+            print("%s\t%s\t%s"%(k,l),file=of)
+    except Exception as e:
+      print("%s\t%s\t%s"%(fno,e),file=bf)
+    if (path.exists('/dev/shm/stopJob')):
+      print("Quiting early: %s %s"%(me,fno),file=sys.stderr)
+      exit(1)