annotate bin/doPlinks.sh @ 138:9ea12f7b304b

just barely working
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 23 Jul 2021 16:23:46 +0000
parents b0d9fe66ce8a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
65a56c0d1c1f bolting the barn door...
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/bash
6
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
2 hn=$1
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
3 jn=$2
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
4 tfn=$3
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
5
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
6 mkdir -p /dev/shm/x$hn/${tfn}
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
7 cd /dev/shm/x$hn/${tfn}
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
8 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.pdf'
7
25ca3505b4d7 more logging
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 6
diff changeset
9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2
6
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
10 ls *.pdf | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\
11
b0d9fe66ce8a give up on mpiexec_mpt
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 9
diff changeset
11 $HOME/bin/plinks.py $tfn || { echo $(date) $hn aborted job $jn for $tfn \[remember to clean up\] ; rm -f /dev/shm/x$hn/${tfn}/stopJob ; exit 1 ; }
7
25ca3505b4d7 more logging
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 6
diff changeset
12 echo $(date) $hn tarring $(ls badpdfs_*|wc -l)/$(ls links_*_*|wc -l) results from job $jn for $tfn in $(pwd) 1>&2
6
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
13 tar -cf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/links/${tfn}.tar badpdfs_${tfn} links_${tfn}_*
9
7a93e190c74d logging tweaks, preparing for timeout on problem pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 8
diff changeset
14 echo $(date) $(pwd) rm $(ls -lt badpdfs_*) 1>&2
6
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
15 rm *
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
16 cd ..
8
3b56c2c9d0ee longer run, terser logging
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 7
diff changeset
17 echo $(date) $(pwd) rmdir ${tfn} 1>&2
6
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
18 rmdir ${tfn}
0f494c76a887 refactor to address tarred-up pdfs
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 1
diff changeset
19 echo $(date) $hn finished job ${jn} for ${tfn}