# HG changeset patch # User Henry S. Thompson # Date 1582823882 0 # Node ID 6662a353379a150ee2afe5a917cfea47e2d51e2c # Parent b976a7449d41addd730034dcc9b61cd8ad248e10 fix a mis-folded link file diff -r b976a7449d41 -r 6662a353379a bin/unfold.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/unfold.sh Thu Feb 27 17:18:02 2020 +0000 @@ -0,0 +1,19 @@ +#!/bin/bash +# Usage: unfold.sh links_... +pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs +links=$pdfs/links +f=$1 +IFS=' ' +awk '{if (NR==1) { o=$1 ; u=$2 } +else +{ if ($1=="annot" || $1=="scrape") { +print o,"\t",u +o=$1 +u=$2 +} +else { +u=u $1 +} +} +} +END { print o,"\t",u }' $f