Mercurial > hg > cc > cirrus_home
annotate bin/doHdr.sh @ 15:a96fb2c26c80
works after minor tweaks
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 26 Feb 2020 16:02:22 +0000 |
parents | 9a1de2c4ffe3 |
children |
rev | line source |
---|---|
14 | 1 #!/usr/bin/bash |
2 hn=$1 | |
3 jn=$2 | |
4 tfn=$3 | |
5 | |
6 mkdir -p /dev/shm/x$hn/${tfn} | |
7 cd /dev/shm/x$hn/${tfn} | |
8 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.hdr' | |
9 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 | |
10 ls *.hdr | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ | |
15
a96fb2c26c80
works after minor tweaks
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
14
diff
changeset
|
11 while read n f; do echo "$n "$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2- -d ' ')" "$(egrep -ia '^Last-Modified: ' $f|cut -f 2- -d ' '); done >> hdrs_$tfn.tsv |
14 | 12 echo $(date) $hn moving $(ls hdrs_*.tsv|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 |
13 mv hdrs_$tfn.tsv /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/hdrs | |
14 rm * | |
15 cd .. | |
16 echo $(date) $(pwd) rmdir ${tfn} 1>&2 | |
17 rmdir ${tfn} | |
18 echo $(date) $hn finished job ${jn} for ${tfn} |