view bin/plinks.py @ 51:c0b4359dd26a

working better, gets confused by 3-part response
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Jul 2023 17:03:52 +0100
parents 38bab758e469
children
line wrap: on
line source

#!/usr/bin/env python3
import sys,pdfx,traceback,os
from datetime import datetime

def run(file):
  try:
    pdf=pdfx.PDFx(file)
    links=pdf.get_references_as_dict()
    if bool(links) and (links.get('scrape',False) or
                        links.get('annot',False)):
      for k in links.keys():
        for l in links[k]:
          print("%s\t%s"%(k,l))
    else:
      print("None")
  except Exception as e:
    if str(e)=='Unexpected EOF':
      print("%s:\t%s"%(datetime.now().isoformat(),e),file=sys.stderr)
      print("badpdf")
    else:
      print("%s: "%(datetime.now().isoformat()),end='',file=sys.stderr)
      traceback.print_exc(file=sys.stderr)

if sys.argv[1]=='-':
  i=0
  for l in sys.stdin:
    print(i,file=sys.stderr)
    i+=1
    f=l.rstrip()
    if os.path.getsize(f)==1048576: # truncated
      print("truncated",file=sys.stderr)
      print("truncated")
    else:
      run(f)
    os.unlink(f)
else:
  run(sys.argv[1])