Mercurial > hg > cc > azure
comparison workers/bin/ptimedWhich.sh @ 49:b8a88cad75d5
revert cci pattern
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Sat, 15 Dec 2018 10:34:14 +0000 |
parents | 2a0dab424418 |
children |
comparison
equal
deleted
inserted
replaced
48:3b951980206d | 49:b8a88cad75d5 |
---|---|
64 N=$(wc -l< ifile.txt) | 64 N=$(wc -l< ifile.txt) |
65 export -f tryread lrand | 65 export -f tryread lrand |
66 while read s | 66 while read s |
67 do | 67 do |
68 url="https://commoncrawl.s3.amazonaws.com/$s" | 68 url="https://commoncrawl.s3.amazonaws.com/$s" |
69 # below for 2018-04, for 2017-04 needs $13 instead of $14 | 69 # below for 2014-04, 2017-04: for 2018-04, needs $14 instead of $13 |
70 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$14}' |tr ' ' \-) | 70 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-) |
71 echo $url /var/data/$cci | 71 echo $url /var/data/$cci |
72 done < ifile.txt 2>> $res/errs | \ | 72 done < ifile.txt 2>> $res/errs | \ |
73 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$? | 73 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$? |
74 echo \# $(date) main loop exit code=$pRes >> $log | 74 echo \# $(date) main loop exit code=$pRes >> $log |
75 rm .running | 75 rm .running |