view bin/unfold.sh @ 166:afd7879181c9

old style
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:15:20 +0100
parents a82c325e8b32
children
line wrap: on
line source

#!/bin/bash
# Usage: unfold.sh links_...
pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs
links=$pdfs/links
f=$1
IFS='	'
awk '{if (NR==1) { o=$1 ; u=$2 }
else
{ if ($1=="annot" || $1=="scrape") {
print o,"\t",u
o=$1
u=$2
}
else {
u=u $1     
}             
}
}
END { print o,"\t",u }' $f