comparison workers/bin/ptimedWhich.sh @ 47:2a0dab424418

cci path hack changed for 2018.04
author Henry S. Thompson <ht@markup.co.uk>
date Mon, 10 Dec 2018 14:43:18 +0000
parents 7a4e49689935
children b8a88cad75d5
comparison
equal deleted inserted replaced
46:7a4e49689935 47:2a0dab424418
64 N=$(wc -l< ifile.txt) 64 N=$(wc -l< ifile.txt)
65 export -f tryread lrand 65 export -f tryread lrand
66 while read s 66 while read s
67 do 67 do
68 url="https://commoncrawl.s3.amazonaws.com/$s" 68 url="https://commoncrawl.s3.amazonaws.com/$s"
69 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-) 69 # below for 2018-04, for 2017-04 needs $13 instead of $14
70 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$14}' |tr ' ' \-)
70 echo $url /var/data/$cci 71 echo $url /var/data/$cci
71 done < ifile.txt 2>> $res/errs | \ 72 done < ifile.txt 2>> $res/errs | \
72 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$? 73 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$?
73 echo \# $(date) main loop exit code=$pRes >> $log 74 echo \# $(date) main loop exit code=$pRes >> $log
74 rm .running 75 rm .running