changeset 15:a96fb2c26c80

works after minor tweaks
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 26 Feb 2020 16:02:22 +0000
parents 9a1de2c4ffe3
children 47ef882acbec
files bin/doHdr.sh hdrJob.sh
diffstat 2 files changed, 2 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/bin/doHdr.sh	Wed Feb 26 15:47:20 2020 +0000
+++ b/bin/doHdr.sh	Wed Feb 26 16:02:22 2020 +0000
@@ -8,10 +8,9 @@
 tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.hdr'
 echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2
 ls *.hdr | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\
-while read n f; do echo "$n\t"$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2 -d ' ')"\t"$(egrep -ia '^Last-Modified: ' $f|cut -f 2 -d ' '); done >> hdrs_$tfn.tsv
+while read n f; do echo "$n	"$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2- -d ' ')"	"$(egrep -ia '^Last-Modified: ' $f|cut -f 2- -d ' '); done >> hdrs_$tfn.tsv
 echo $(date) $hn moving $(ls hdrs_*.tsv|wc -l) results from job $jn for $tfn in $(pwd) 1>&2
 mv hdrs_$tfn.tsv /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/hdrs
-echo $(date) $(pwd) rm $(ls -lt hdrs_*.tsv) 1>&2
 rm *
 cd ..
 echo $(date) $(pwd) rmdir ${tfn} 1>&2
--- a/hdrJob.sh	Wed Feb 26 15:47:20 2020 +0000
+++ b/hdrJob.sh	Wed Feb 26 16:02:22 2020 +0000
@@ -5,7 +5,7 @@
 #PBS -l walltime=08:00:00
 #PBS -V
 #PBS -A dc007
-#PBS -N plinks
+#PBS -N hdr
 
 #module load mpt