Mercurial > hg > cc > azure
comparison workers/bin/ptimedWhich.sh @ 47:2a0dab424418
cci path hack changed for 2018.04
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 10 Dec 2018 14:43:18 +0000 |
parents | 7a4e49689935 |
children | b8a88cad75d5 |
comparison
equal
deleted
inserted
replaced
46:7a4e49689935 | 47:2a0dab424418 |
---|---|
64 N=$(wc -l< ifile.txt) | 64 N=$(wc -l< ifile.txt) |
65 export -f tryread lrand | 65 export -f tryread lrand |
66 while read s | 66 while read s |
67 do | 67 do |
68 url="https://commoncrawl.s3.amazonaws.com/$s" | 68 url="https://commoncrawl.s3.amazonaws.com/$s" |
69 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-) | 69 # below for 2018-04, for 2017-04 needs $13 instead of $14 |
70 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$14}' |tr ' ' \-) | |
70 echo $url /var/data/$cci | 71 echo $url /var/data/$cci |
71 done < ifile.txt 2>> $res/errs | \ | 72 done < ifile.txt 2>> $res/errs | \ |
72 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$? | 73 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$? |
73 echo \# $(date) main loop exit code=$pRes >> $log | 74 echo \# $(date) main loop exit code=$pRes >> $log |
74 rm .running | 75 rm .running |