# HG changeset patch # User Henry S. Thompson # Date 1618837696 0 # Node ID 0332076afc370f84a17428edc10e022eafdbc39b # Parent fbca56fabbaca61bb964f98acc383a2a6b90c4fc better dd error handling diff -r fbca56fabbac -r 0332076afc37 bin/ix.sh --- a/bin/ix.sh Mon Apr 19 13:07:58 2021 +0000 +++ b/bin/ix.sh Mon Apr 19 13:08:16 2021 +0000 @@ -12,6 +12,7 @@ if [ "$1" = "-d" ] then d=1 + rm /tmp/ix_dd_log.txt /tmp/ix_triples.tsv shift fi if [ "$1" = "-w" ] @@ -44,7 +45,7 @@ # get triples from index lines egrep -ao 'length": "[0-9]*", "offset": "[0-9]*".*\.gz'| \ sed 's/[a-z]*": "//g;s/", "/\t/g;s/\(crawl-data\|segments\|warc\)\///g' |\ - if [ "$d" ] ; then tee /tmp/ix_triples.tsv ; else cat ; fi + if [ "$d" ] ; then tee -a /tmp/ix_triples.tsv ; else cat ; fi elif [ "$1" ] then printf "%s\t%s\t%s\n" "$1" "$2" "$3" @@ -56,14 +57,14 @@ if [ -z "$d" ] then dd if="$f" of=/dev/stdout skip=$o count=$l \ - iflag=skip_bytes,count_bytes 2>/dev/null + iflag=skip_bytes,count_bytes status=none else echo dd if="$f" of=/dev/stdout skip=$o count=$l \ - iflag=skip_bytes,count_bytes > /tmp/ix_dd_log.txt + iflag=skip_bytes,count_bytes >> /tmp/ix_dd_log.txt dd if="$f" of=/dev/stdout skip=$o count=$l \ iflag=skip_bytes,count_bytes 2>> /tmp/ix_dd_log.txt fi | \ -unpigz -dp 1 -c | tee /tmp/data | \ +{ unpigz -dp 1 -c || { printf "dd failure?: %s %s %s\n" $f $o $l 1>&2 ; continue ; } } |\ { s="w" if [ "$p" ] then