Mercurial > hg > cc > cirrus_home
comparison bin/ix.sh @ 89:90f8f28b2e51
working on flags
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 13 Apr 2021 17:52:31 +0000 |
parents | 464d2dfb99c9 |
children | 5384208a0834 |
comparison
equal
deleted
inserted
replaced
88:464d2dfb99c9 | 89:90f8f28b2e51 |
---|---|
1 #!/usr/bin/bash | 1 #!/usr/bin/bash |
2 # Extract records from warc files given filename, length and offset triples | 2 # Extract records from warc files given filename, length and offset triples |
3 # from stdin or as command line args | 3 # from stdin or as command line args |
4 # Usage [-w] [-h] [-b] [path length offset] | |
5 # -w WARC headers | |
6 # -h HTTP headers | |
7 # -b HTTP body | |
8 # No switch defaults to whole record | |
9 if [ "$1" = "-w" ] | |
10 then | |
11 shift | |
12 p=1 | |
13 w=1 | |
14 fi | |
15 if [ "$1" = "-h" ] | |
16 then | |
17 shift | |
18 p=1 | |
19 h=1 | |
20 fi | |
21 if [ "$1" = "-b" ] | |
22 then | |
23 shift | |
24 p=1 | |
25 b=1 | |
26 fi | |
4 if [ -n "$1" ] | 27 if [ -n "$1" ] |
5 then | 28 then |
6 printf "%s\t%s\t%s\n" "$1" "$2" "$3" | 29 printf "%s\t%s\t%s\n" "$1" "$2" "$3" |
7 else | 30 else |
8 cat | 31 cat |
9 fi | \ | 32 fi | \ |
10 while { IFS=$'\t' read f l o; } | 33 while { IFS=$'\t' read f l o; } |
11 do | 34 do |
12 dd if="$f" of=/dev/stdout skip=$o count=$l iflag=skip_bytes,count_bytes | 35 dd if="$f" of=/dev/stdout skip=$o count=$l \ |
13 done | unpigz -dp 1 -c | 36 iflag=skip_bytes,count_bytes 2>/dev/null |
37 done | unpigz -dp 1 -c | \ | |
38 s="w" | |
39 if [ -n "$p" ] | |
40 then | |
41 while read -r L | |
42 do | |
43 if [ "$s" = "w" ] | |
44 then | |
45 # WARC header | |
46 if [ "$L" = " | |
47 " ] | |
48 then | |
49 s="h" | |
50 continue | |
51 fi | |
52 if [ -n "$w" ] | |
53 then | |
54 printf "%s\n" "${L%% | |
55 }" | |
56 fi | |
57 continue | |
58 fi | |
59 if [ "$s" = "b" ] | |
60 then | |
61 # HTTP header | |
62 case "$L" in | |
63 | |
64 ) s="b" ; n=0 ; continue ;; | |
65 Content-Length:\ *) bl=${L##*: } | |
66 bl=${bl%%*([ | |
67 [:space:]])} | |
68 ;; | |
69 esac | |
70 if [ -n "$w" ] | |
71 then | |
72 printf "%s\n" "${L%% | |
73 }" | |
74 fi | |
75 continue | |
76 else | |
77 # HTTP body | |
78 if [ -n "$b" ] | |
79 then | |
80 printf "%s\n" "$bl" 1>&2 | |
81 head -c "$bl" | |
82 else | |
83 break | |
84 fi | |
85 fi | |
86 done | |
87 else | |
88 cat | |
89 fi # No flags,the whole thing |