changeset 22:38bab758e469

accept filenames on stdin, check for 1M => truncation, always produce some output even if no links
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 29 Sep 2022 16:36:52 +0100
parents cbac7dfe2f24
children e82a82ea3704
files bin/plinks.py
diffstat 1 files changed, 32 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/bin/plinks.py	Thu Sep 29 16:33:42 2022 +0100
+++ b/bin/plinks.py	Thu Sep 29 16:36:52 2022 +0100
@@ -1,25 +1,37 @@
 #!/usr/bin/env python3
-import sys,pdfx,traceback
+import sys,pdfx,traceback,os
 from datetime import datetime
 
 def run(file):
-  global pdf
-  pdf=pdfx.PDFx(file)
-  return pdf.get_references_as_dict()
-
-f=sys.argv[1]
+  try:
+    pdf=pdfx.PDFx(file)
+    links=pdf.get_references_as_dict()
+    if bool(links) and (links.get('scrape',False) or
+                        links.get('annot',False)):
+      for k in links.keys():
+        for l in links[k]:
+          print("%s\t%s"%(k,l))
+    else:
+      print("None")
+  except Exception as e:
+    if str(e)=='Unexpected EOF':
+      print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr)
+      print("badpdf")
+    else:
+      print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr)
+      traceback.print_exc(file=sys.stderr)
 
-try:
-  links=run(f)
-  if bool(links) and (links.get('scrape',False) or
-                      links.get('annot',False)):
-    for k in links.keys():
-      for l in links[k]:
-        print("%s\t%s"%(k,l))
-except Exception as e:
-  if str(e)=='Unexpected EOF':
-    print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr)
-  else:
-    print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr)
-    traceback.print_exc(file=sys.stderr)
-
+if sys.argv[1]=='-':
+  i=0
+  for l in sys.stdin:
+    print(i,file=sys.stderr)
+    i+=1
+    f=l.rstrip()
+    if os.path.getsize(f)==1048576: # truncated
+      print("truncated",file=sys.stderr)
+      print("truncated")
+    else:
+      run(f)
+    os.unlink(f)
+else:
+  run(sys.argv[1])