annotate bin/warc.sh @ 0:fdd3f8a16fd4 default tip

shared scripts on valhalla cluster
author Henry Thompson <ht@markup.co.uk>
date Sat, 14 Mar 2020 11:00:58 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
1 #!/bin/bash
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
2 # Try to fillet warc payloads with just a shell script
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
3 # Usage warc.sh outfilePrefix [-n startnum]
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
4
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
5 LANG=C # count bytes
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
6 LC_ALL=C # count bytes
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
7 IFS=$'\n'
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
8 shopt -qs nocasematch
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
9 shopt -qs extglob
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
10
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
11 handle_body () {
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
12 ## read -r -N $l L doesn't work for binary bodies that contain a \000 because of Bash 'feature'
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
13 l=$1
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
14 head -c $l
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
15 r=$?
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
16 if [ $r -ne 0 ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
17 echo "truncated \$? = $r" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
18 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
19 }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
20
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
21 handle_payload () {
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
22 n=$1
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
23 l=$2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
24 ol=$2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
25 f=$3
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
26 tr=$4
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
27 tu="$5"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
28 t=' Unknown'
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
29 unset z
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
30 unset bl
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
31 unset xl
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
32 unset hdr
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
33 hn=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
34 while read -r L; do
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
35 ((l = l - (${#L} + 1)))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
36 #((tot = tot + (${#L} + 1)))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
37 #echo p $l 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
38 hdr="${hdr}"$'\n'"${L%% }"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
39 ((hn+=1))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
40 case "$L" in
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
41 Content-Type:\ *) t=${L##*: }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
42 t=${t%%;*}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
43 t=${t%%*([ [:space:]])}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
44 #echo $t 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
45 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
46 Content-Length:\ *) bl=${L##*: }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
47 bl=${bl%%*([ [:space:]])}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
48 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
49 X-Crawler-Content-Length:\ *) xl=${L##*: } # introduced btw 2015&2018???
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
50 xl=${xl%%*([ [:space:]])}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
51 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
52 X-Crawler-Content-Encoding:\ *|Content-Encoding:\ *) # one or the other, change btw 2015&2018???
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
53 z=${L##*: }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
54 ((cec[${z%%*([ [:space:]])}]+=1))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
55 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
56 ) if [ $l -gt 0 ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
57 if [[ "$f" && ( "$f" != "$t" ) ]]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
58 echo "$t" \!= "$f", skipping starting at $((tot + (ol - l))) 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
59 head -c $l >/dev/null
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
60 return
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
61 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
62 if [ "$xl" ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
63 bl=$xl
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
64 xx=x
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
65 else
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
66 unset xx
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
67 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
68 case "$t" in
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
69 application/pdf) s=.pdf ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
70 text/html) s=.html ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
71 *) s=''
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
72 esac
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
73 if [ "$bl" ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
74 if [ $bl -ne $l -a -z "$z" ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
75 echo length mismatch$xx: $n here: $l given: $bl trunc: $tr 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
76 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
77 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
78 echo "reading $l bytes into ${pprefix}_$n$s as $t starting at $((tot + (ol - l)))" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
79 { echo "$hdr" | head -$((hn-1)) | tail -n +2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
80 if [ "$tr" ] ; then echo "X-HST-Truncated: $tr"; fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
81 echo "X-HST-Target-URI: $tu"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
82 } > ${pprefix}_$n.hdr #
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
83 handle_body $l > ${pprefix}_$n$s
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
84 else
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
85 echo "empty body, skipping" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
86 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
87 return;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
88 esac
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
89 done
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
90 }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
91
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
92 handle_resp () {
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
93 n=$1
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
94 f=$2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
95 unset tr
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
96 while read -r L; do
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
97 tot=$((tot + ${#L} + 1))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
98 case "$L" in
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
99 Content-Length:\ *) l=${L##*: }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
100 #surrounding spaces don't matter for arithmetic
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
101 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
102 WARC-Truncated:\ *) # echo $n $L
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
103 tr=${L##*: }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
104 tr=${tr%%*([ [:space:]])}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
105 tr=${tr:-EMPTY}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
106 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
107 WARC-Target-URI:\ *) tu=${L##*: }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
108 tu=${tu%%*([ [:space:]])}
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
109 # echo "|$L|$tu|"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
110 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
111 ) ll=${l%%*([ [:space:]])} # but the \r has to go
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
112 #echo "h_p at $tot" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
113 #echo "|$tu|${tu# }|"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
114 handle_payload $n $ll "$f" "${tr# }" "${tu# }"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
115 tot=$((tot + ll))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
116 #echo "h_p done: $tot" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
117 return
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
118 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
119 esac
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
120 done
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
121 }
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
122
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
123 # outer loop
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
124 pprefix="$1"
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
125 shift
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
126 if [ "$1" = "-n" ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
127 n=$2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
128 shift; shift
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
129 else
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
130 n=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
131 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
132 tot=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
133 c=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
134 f=$1
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
135 wc=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
136 declare -A cec
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
137 while read -r L; do
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
138 tot=$((tot + ${#L} + 1))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
139 case ${L% } in
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
140 WARC/1.0)
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
141 if [ $wc -eq 0 -a $c -gt 0 ]; then
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
142 echo "WARC/1.0 after $c non-blank lines record $n char $tot" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
143 fi
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
144 ((wc++))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
145 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
146 "")
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
147 :
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
148 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
149 WARC-Type:\ response)
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
150 echo tot at resp prop: $tot 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
151 handle_resp $((n = n + 1)) $f
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
152 c=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
153 wc=0
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
154 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
155 *)
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
156 c=$((c + 1))
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
157 ;;
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
158 esac
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
159 done
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
160 echo "Last response #: $n" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
161 echo "Compression stats:" 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
162 for i in "${!cec[@]}"; do
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
163 printf " %10s: %s\n" $i ${cec[$i]} 1>&2
fdd3f8a16fd4 shared scripts on valhalla cluster
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
164 done