comparison bin/unfold.sh @ 18:6662a353379a

fix a mis-folded link file
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 27 Feb 2020 17:18:02 +0000
parents
children a82c325e8b32
comparison
equal deleted inserted replaced
17:b976a7449d41 18:6662a353379a
1 #!/bin/bash
2 # Usage: unfold.sh links_...
3 pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs
4 links=$pdfs/links
5 f=$1
6 IFS=' '
7 awk '{if (NR==1) { o=$1 ; u=$2 }
8 else
9 { if ($1=="annot" || $1=="scrape") {
10 print o,"\t",u
11 o=$1
12 u=$2
13 }
14 else {
15 u=u $1
16 }
17 }
18 }
19 END { print o,"\t",u }' $f