comparison bin/ix.sh @ 89:90f8f28b2e51

working on flags
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 13 Apr 2021 17:52:31 +0000
parents 464d2dfb99c9
children 5384208a0834
comparison
equal deleted inserted replaced
88:464d2dfb99c9 89:90f8f28b2e51
1 #!/usr/bin/bash 1 #!/usr/bin/bash
2 # Extract records from warc files given filename, length and offset triples 2 # Extract records from warc files given filename, length and offset triples
3 # from stdin or as command line args 3 # from stdin or as command line args
4 # Usage [-w] [-h] [-b] [path length offset]
5 # -w WARC headers
6 # -h HTTP headers
7 # -b HTTP body
8 # No switch defaults to whole record
9 if [ "$1" = "-w" ]
10 then
11 shift
12 p=1
13 w=1
14 fi
15 if [ "$1" = "-h" ]
16 then
17 shift
18 p=1
19 h=1
20 fi
21 if [ "$1" = "-b" ]
22 then
23 shift
24 p=1
25 b=1
26 fi
4 if [ -n "$1" ] 27 if [ -n "$1" ]
5 then 28 then
6 printf "%s\t%s\t%s\n" "$1" "$2" "$3" 29 printf "%s\t%s\t%s\n" "$1" "$2" "$3"
7 else 30 else
8 cat 31 cat
9 fi | \ 32 fi | \
10 while { IFS=$'\t' read f l o; } 33 while { IFS=$'\t' read f l o; }
11 do 34 do
12 dd if="$f" of=/dev/stdout skip=$o count=$l iflag=skip_bytes,count_bytes 35 dd if="$f" of=/dev/stdout skip=$o count=$l \
13 done | unpigz -dp 1 -c 36 iflag=skip_bytes,count_bytes 2>/dev/null
37 done | unpigz -dp 1 -c | \
38 s="w"
39 if [ -n "$p" ]
40 then
41 while read -r L
42 do
43 if [ "$s" = "w" ]
44 then
45 # WARC header
46 if [ "$L" = "
47 " ]
48 then
49 s="h"
50 continue
51 fi
52 if [ -n "$w" ]
53 then
54 printf "%s\n" "${L%%
55 }"
56 fi
57 continue
58 fi
59 if [ "$s" = "b" ]
60 then
61 # HTTP header
62 case "$L" in
63
64 ) s="b" ; n=0 ; continue ;;
65 Content-Length:\ *) bl=${L##*: }
66 bl=${bl%%*([
67 [:space:]])}
68 ;;
69 esac
70 if [ -n "$w" ]
71 then
72 printf "%s\n" "${L%%
73 }"
74 fi
75 continue
76 else
77 # HTTP body
78 if [ -n "$b" ]
79 then
80 printf "%s\n" "$bl" 1>&2
81 head -c "$bl"
82 else
83 break
84 fi
85 fi
86 done
87 else
88 cat
89 fi # No flags,the whole thing