# HG changeset patch # User Henry S. Thompson # Date 1540051889 0 # Node ID d4f186655bcc0ec492a0f3923a9b85f0892519b0 # Parent 9631fca89cc6ab420916e03b88b8a20fb0447046 lots of tweaking, reached the 80/20 point diff -r 9631fca89cc6 -r d4f186655bcc workers/bin/_timedWhich.sh --- a/workers/bin/_timedWhich.sh Fri Oct 19 14:25:19 2018 +0000 +++ b/workers/bin/_timedWhich.sh Sat Oct 20 16:11:29 2018 +0000 @@ -2,6 +2,6 @@ egrep -o '("WARC-Target-URI":"https?:|"Last-Modified":"[^"]*")'|\ egrep -o '(https?:|:".*"$)' |\ tr '\012' \# | sed 's/:#:/ /g'|tr \# '\012' | tr -d \"|\ - sed 's/ [[:digit:]][[:digit:]]\?:[[:digit:]][[:digit:]]:[[:digit:]][[:digit:]] / /;s/\(https\? \)\(: \)\?[MTWFSa-z]..\.\?, \?/\1/;s/ \([-+][[:digit:]]\{4\}\|[[:upper:]]\{2,3\}\)$//;s/ [[:digit:]]\{1,2\} / /;s/\/[[:digit:]]\{1,2\}\/\([[:digit:]]\{4\}\)$/ \1/'|\ +sed ';s/gmt//ig;s/ [[:digit:]][[:digit:]]\?:[[:digit:]][[:digit:]]:[[:digit:]][[:digit:]]\(\.[[:digit:]]*\)\?\b//;s/^\(https\? \)\(: \)/\1/;s/ [MTWFSa-z]..\.\?, \?/ /;s/\( [[:upper:]][[:alnum:]]\{1,3\}\)\{1,2\}$//;s/ [-+][[:digit:]]\{4\}\b//;s/ [[:digit:]]\{1,2\} / /;s/ [[:upper:]][[:alnum:]]*\/[[:upper:]][[:alnum:]]*$//;s/ \+$//'|\ awk '{c[$0]+=1} END {for (k in c) {print k, c[k]}}'