Mercurial > hg > cc > cirrus_home
changeset 10:a33db8e3f51c
bigger run, longer limit
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 25 Feb 2020 14:56:36 +0000 |
parents | 7a93e190c74d |
children | b0d9fe66ce8a |
files | bin/plinks.py bin/plinks.sh |
diffstat | 2 files changed, 8 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/plinks.py Tue Feb 25 10:34:41 2020 +0000 +++ b/bin/plinks.py Tue Feb 25 14:56:36 2020 +0000 @@ -5,8 +5,8 @@ def run(file): global pdf - pdf=pdfx.PDFx(file,limit=30) - return pdf.get_references_as_dict() + pdf=pdfx.PDFx(file,limit=60) + return (pdf.get_references_as_dict(),pdf.limited) tarnum=sys.argv[1] print(tarnum, sys.argv, os.getcwd(),file=sys.stderr) @@ -15,7 +15,10 @@ for l in sys.stdin: (fno,f)=l.split() try: - links=run(f) + (links,limited)=run(f) + if limited: + print("%s\t%s\tProcessing limited after timeout"%( + datetime.now().isoformat(),fno),file=bf) if bool(links) and (links.get('scrape',False) or links.get('annot',False)): gf+=1
--- a/bin/plinks.sh Tue Feb 25 10:34:41 2020 +0000 +++ b/bin/plinks.sh Tue Feb 25 14:56:36 2020 +0000 @@ -3,6 +3,6 @@ echo $(date) $(hostname) h=$(hostname) hn=${h##*n} -if [ $hn -eq 0 ]; then echo {005..008}; else echo {009..012}; fi |\ -tr ' ' '\012' |parallel --will-cite -n 1 doPlinks.sh ${hn} '{#}' '{}' +if [ $hn -eq 0 ]; then echo {013..062}; else echo {063..112}; fi |\ +tr ' ' '\012' |parallel --will-cite -j 30 -N 1 doPlinks.sh ${hn} '{#}' '{}' echo $(date) $(hostname) $?