Mercurial > hg > cc > azure
changeset 47:2a0dab424418
cci path hack changed for 2018.04
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 10 Dec 2018 14:43:18 +0000 |
parents | 7a4e49689935 |
children | 3b951980206d |
files | workers/bin/ptimedWhich.sh |
diffstat | 1 files changed, 2 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/workers/bin/ptimedWhich.sh Mon Dec 03 21:10:02 2018 +0000 +++ b/workers/bin/ptimedWhich.sh Mon Dec 10 14:43:18 2018 +0000 @@ -66,7 +66,8 @@ while read s do url="https://commoncrawl.s3.amazonaws.com/$s" - cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-) + # below for 2018-04, for 2017-04 needs $13 instead of $14 + cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$14}' |tr ' ' \-) echo $url /var/data/$cci done < ifile.txt 2>> $res/errs | \ parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$?