# HG changeset patch # User Henry S. Thompson # Date 1582732942 0 # Node ID a96fb2c26c8033ce48a23f24971a915afb97cdf1 # Parent 9a1de2c4ffe3c54a80faf32c63a26d3aac7f90b3 works after minor tweaks diff -r 9a1de2c4ffe3 -r a96fb2c26c80 bin/doHdr.sh --- a/bin/doHdr.sh Wed Feb 26 15:47:20 2020 +0000 +++ b/bin/doHdr.sh Wed Feb 26 16:02:22 2020 +0000 @@ -8,10 +8,9 @@ tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.hdr' echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 ls *.hdr | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ -while read n f; do echo "$n\t"$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2 -d ' ')"\t"$(egrep -ia '^Last-Modified: ' $f|cut -f 2 -d ' '); done >> hdrs_$tfn.tsv +while read n f; do echo "$n "$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2- -d ' ')" "$(egrep -ia '^Last-Modified: ' $f|cut -f 2- -d ' '); done >> hdrs_$tfn.tsv echo $(date) $hn moving $(ls hdrs_*.tsv|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 mv hdrs_$tfn.tsv /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/hdrs -echo $(date) $(pwd) rm $(ls -lt hdrs_*.tsv) 1>&2 rm * cd .. echo $(date) $(pwd) rmdir ${tfn} 1>&2 diff -r 9a1de2c4ffe3 -r a96fb2c26c80 hdrJob.sh --- a/hdrJob.sh Wed Feb 26 15:47:20 2020 +0000 +++ b/hdrJob.sh Wed Feb 26 16:02:22 2020 +0000 @@ -5,7 +5,7 @@ #PBS -l walltime=08:00:00 #PBS -V #PBS -A dc007 -#PBS -N plinks +#PBS -N hdr #module load mpt