comparison bin/doHdr.sh @ 15:a96fb2c26c80

works after minor tweaks
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 26 Feb 2020 16:02:22 +0000
parents 9a1de2c4ffe3
children
comparison
equal deleted inserted replaced
14:9a1de2c4ffe3 15:a96fb2c26c80
6 mkdir -p /dev/shm/x$hn/${tfn} 6 mkdir -p /dev/shm/x$hn/${tfn}
7 cd /dev/shm/x$hn/${tfn} 7 cd /dev/shm/x$hn/${tfn}
8 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.hdr' 8 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.hdr'
9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2
10 ls *.hdr | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ 10 ls *.hdr | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\
11 while read n f; do echo "$n\t"$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2 -d ' ')"\t"$(egrep -ia '^Last-Modified: ' $f|cut -f 2 -d ' '); done >> hdrs_$tfn.tsv 11 while read n f; do echo "$n "$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2- -d ' ')" "$(egrep -ia '^Last-Modified: ' $f|cut -f 2- -d ' '); done >> hdrs_$tfn.tsv
12 echo $(date) $hn moving $(ls hdrs_*.tsv|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 12 echo $(date) $hn moving $(ls hdrs_*.tsv|wc -l) results from job $jn for $tfn in $(pwd) 1>&2
13 mv hdrs_$tfn.tsv /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/hdrs 13 mv hdrs_$tfn.tsv /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/hdrs
14 echo $(date) $(pwd) rm $(ls -lt hdrs_*.tsv) 1>&2
15 rm * 14 rm *
16 cd .. 15 cd ..
17 echo $(date) $(pwd) rmdir ${tfn} 1>&2 16 echo $(date) $(pwd) rmdir ${tfn} 1>&2
18 rmdir ${tfn} 17 rmdir ${tfn}
19 echo $(date) $hn finished job ${jn} for ${tfn} 18 echo $(date) $hn finished job ${jn} for ${tfn}