diff bin/ix.sh @ 88:464d2dfb99c9

new
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 13 Apr 2021 17:02:09 +0000
parents
children 90f8f28b2e51
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/ix.sh	Tue Apr 13 17:02:09 2021 +0000
@@ -0,0 +1,13 @@
+#!/usr/bin/bash
+# Extract records from warc files given filename, length and offset triples
+#  from stdin or as command line args
+if [ -n "$1" ]
+then
+    printf "%s\t%s\t%s\n" "$1" "$2" "$3"
+else
+    cat
+fi | \
+while { IFS=$'\t' read f l o; }
+do
+  dd if="$f" of=/dev/stdout skip=$o count=$l iflag=skip_bytes,count_bytes
+done | unpigz -dp 1 -c