view bin/unfold.sh @ 18:6662a353379a

fix a mis-folded link file
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 27 Feb 2020 17:18:02 +0000
parents
children a82c325e8b32
line wrap: on
line source

#!/bin/bash
# Usage: unfold.sh links_...
pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs
links=$pdfs/links
f=$1
IFS='  '
awk '{if (NR==1) { o=$1 ; u=$2 }
else
{ if ($1=="annot" || $1=="scrape") {
print o,"\t",u
o=$1
u=$2
}
else {
u=u $1     
}             
}
}
END { print o,"\t",u }' $f