comparison bin/doPlinks.sh @ 9:7a93e190c74d

logging tweaks, preparing for timeout on problem pdfs
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 25 Feb 2020 10:34:41 +0000
parents 3b56c2c9d0ee
children b0d9fe66ce8a
comparison
equal deleted inserted replaced
8:3b56c2c9d0ee 9:7a93e190c74d
9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2
10 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ 10 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\
11 plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/stopJob ; exit 1 ; } 11 plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/stopJob ; exit 1 ; }
12 echo $(date) $hn tarring $(ls badpdfs_*|wc -l)/$(ls links_*_*|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 12 echo $(date) $hn tarring $(ls badpdfs_*|wc -l)/$(ls links_*_*|wc -l) results from job $jn for $tfn in $(pwd) 1>&2
13 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_* 13 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_*
14 echo $(date) $(pwd) rm $(ls -lt badpdfs_*) 14 echo $(date) $(pwd) rm $(ls -lt badpdfs_*) 1>&2
15 echo . . . $(ls -lt links_*_* | tee >(tail -1 1>&2) | head -1) 1>&2
16 rm * 15 rm *
17 cd .. 16 cd ..
18 echo $(date) $(pwd) rmdir ${tfn} 1>&2 17 echo $(date) $(pwd) rmdir ${tfn} 1>&2
19 rmdir ${tfn} 18 rmdir ${tfn}
20 echo $(date) $hn finished job ${jn} for ${tfn} 19 echo $(date) $hn finished job ${jn} for ${tfn}