Mercurial > hg > cc > cirrus_home
view bin/doHdr.sh @ 106:815b33c3254a
working with -x and rich directory structure
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 20 Apr 2021 12:26:09 +0000 |
parents | a96fb2c26c80 |
children |
line wrap: on
line source
#!/usr/bin/bash hn=$1 jn=$2 tfn=$3 mkdir -p /dev/shm/x$hn/${tfn} cd /dev/shm/x$hn/${tfn} tar -xf /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/${tfn}.tar '*.hdr' echo $(date) $hn $(pwd) untarred ${tfn}.tar for job $jn 1>&2 ls *.hdr | sort --field-separator=_ -k1,1 -k2n,2 | cat -n |\ while read n f; do echo "$n "$(egrep -a '^X-HST-Target-URI: ' $f|cut -f 2- -d ' ')" "$(egrep -ia '^Last-Modified: ' $f|cut -f 2- -d ' '); done >> hdrs_$tfn.tsv echo $(date) $hn moving $(ls hdrs_*.tsv|wc -l) results from job $jn for $tfn in $(pwd) 1>&2 mv hdrs_$tfn.tsv /beegfs/common_crawl/CC-MAIN-2019-35/pdfs/hdrs rm * cd .. echo $(date) $(pwd) rmdir ${tfn} 1>&2 rmdir ${tfn} echo $(date) $hn finished job ${jn} for ${tfn}