comparison bin/doExtract.sh @ 36:e912ed51146a

fixed scope pblm in tar step
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 26 Mar 2020 15:29:12 +0000
parents ec99b2d1d2fc
children ef3533d3ac4b
comparison
equal deleted inserted replaced
35:ec99b2d1d2fc 36:e912ed51146a
20 pfx=$(ls /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/CC-MAIN-*-00000.warc.gz |\ 20 pfx=$(ls /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/CC-MAIN-*-00000.warc.gz |\
21 cut -f 6 -d / | cut -f 3,4 -d -) 21 cut -f 6 -d / | cut -f 3,4 -d -)
22 22
23 cat ../by11s.txt | while read i j 23 cat ../by11s.txt | while read i j
24 do ((n=i/11)) 24 do ((n=i/11))
25 tar -cf /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extract_${n}.tar.gz \ 25 tar -cf /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extract_${n}.tar \
26 $(seq $i $j | xargs -I ^ bash -c '{ k=^; printf "${pfx}-%05.0f_* logs/?_%03.0f_log\n" $k $k ; }') 26 $(seq $i $j | xargs -I ^ bash -c '{ k=^; printf "${0}-%05.0f_* logs/*_%03.0f_log\n" $k $k ; }' $pfx)
27 done && 27 done &&
28 echo $(date) $(hostname) $jobid /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extracts_\{0..${n}\}.tar 28 echo $(date) $(hostname) $jobid /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extracts_\{0..${n}\}.tar
29 fi 29 fi