Mercurial > hg > cc > cirrus_home
comparison bin/doPlinks.sh @ 6:0f494c76a887
refactor to address tarred-up pdfs
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 23 Feb 2020 16:48:34 +0000 |
parents | a4b0359456bc |
children | 25ca3505b4d7 |
comparison
equal
deleted
inserted
replaced
5:a28d731977da | 6:0f494c76a887 |
---|---|
1 #!/usr/bin/bash | 1 #!/usr/bin/bash |
2 mkdir -p /dev/shm/x | 2 hn=$1 |
3 plinks.py $1 | 3 jn=$2 |
4 # while read f | 4 tfn=$3 |
5 # do | 5 |
6 # if plinks.py $f > /dev/shm/x/links_${me}_${mine} 2>/dev/null | 6 mkdir -p /dev/shm/x$hn/${tfn} |
7 # then | 7 cd /dev/shm/x$hn/${tfn} |
8 # ((mine+=1)) | 8 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.pdf' |
9 # else | 9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn |
10 # echo $f >> /dev/shm/x/badpdfs_$me | 10 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ |
11 # rm -f /dev/shm/x/links_${me}_${mine} | 11 plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/stopJob ; exit 1 ; } |
12 # fi | 12 echo $(date) $hn tarring results from job $jn for $tfn in $(pwd) |
13 # done | 13 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_* |
14 rsync -a /dev/shm/x/ links | 14 rm * |
15 cd .. | |
16 rmdir ${tfn} | |
17 echo $(date) $hn finished job ${jn} for ${tfn} |