# HG changeset patch # User Henry S. Thompson # Date 1544452998 0 # Node ID 2a0dab424418854a9c866205b9c5ca7e1230bb5f # Parent 7a4e49689935e90a4f41e75a31593837bdad5388 cci path hack changed for 2018.04 diff -r 7a4e49689935 -r 2a0dab424418 workers/bin/ptimedWhich.sh --- a/workers/bin/ptimedWhich.sh Mon Dec 03 21:10:02 2018 +0000 +++ b/workers/bin/ptimedWhich.sh Mon Dec 10 14:43:18 2018 +0000 @@ -66,7 +66,8 @@ while read s do url="https://commoncrawl.s3.amazonaws.com/$s" - cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-) + # below for 2018-04, for 2017-04 needs $13 instead of $14 + cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$14}' |tr ' ' \-) echo $url /var/data/$cci done < ifile.txt 2>> $res/errs | \ parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$?