Mercurial > hg > cc > cirrus_home
comparison bin/doExtract.sh @ 36:e912ed51146a
fixed scope pblm in tar step
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 26 Mar 2020 15:29:12 +0000 |
parents | ec99b2d1d2fc |
children | ef3533d3ac4b |
comparison
equal
deleted
inserted
replaced
35:ec99b2d1d2fc | 36:e912ed51146a |
---|---|
20 pfx=$(ls /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/CC-MAIN-*-00000.warc.gz |\ | 20 pfx=$(ls /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/CC-MAIN-*-00000.warc.gz |\ |
21 cut -f 6 -d / | cut -f 3,4 -d -) | 21 cut -f 6 -d / | cut -f 3,4 -d -) |
22 | 22 |
23 cat ../by11s.txt | while read i j | 23 cat ../by11s.txt | while read i j |
24 do ((n=i/11)) | 24 do ((n=i/11)) |
25 tar -cf /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extract_${n}.tar.gz \ | 25 tar -cf /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extract_${n}.tar \ |
26 $(seq $i $j | xargs -I ^ bash -c '{ k=^; printf "${pfx}-%05.0f_* logs/?_%03.0f_log\n" $k $k ; }') | 26 $(seq $i $j | xargs -I ^ bash -c '{ k=^; printf "${0}-%05.0f_* logs/*_%03.0f_log\n" $k $k ; }' $pfx) |
27 done && | 27 done && |
28 echo $(date) $(hostname) $jobid /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extracts_\{0..${n}\}.tar | 28 echo $(date) $(hostname) $jobid /beegfs/common_crawl/CC-MAIN-${ccid}/${segid}/extracts_\{0..${n}\}.tar |
29 fi | 29 fi |