Mercurial > hg > cc > cirrus_home
view bin/doPlinks.sh @ 8:3b56c2c9d0ee
longer run, terser logging
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 24 Feb 2020 12:16:10 +0000 |
parents | 25ca3505b4d7 |
children | 7a93e190c74d |
line wrap: on
line source
#!/usr/bin/bash hn=$1 jn=$2 tfn=$3 mkdir -p /dev/shm/x$hn/${tfn} cd /dev/shm/x$hn/${tfn} tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.pdf' echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/stopJob ; exit 1 ; } echo $(date) $hn tarring $(ls badpdfs_*|wc -l)/$(ls links_*_*|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_* echo $(date) $(pwd) rm $(ls -lt badpdfs_*) echo . . . $(ls -lt links_*_* | tee >(tail -1 1>&2) | head -1) 1>&2 rm * cd .. echo $(date) $(pwd) rmdir ${tfn} 1>&2 rmdir ${tfn} echo $(date) $hn finished job ${jn} for ${tfn}