Mercurial > hg > cc > cirrus_home
diff bin/unfold.sh @ 18:6662a353379a
fix a mis-folded link file
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 27 Feb 2020 17:18:02 +0000 |
parents | |
children | a82c325e8b32 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/unfold.sh Thu Feb 27 17:18:02 2020 +0000 @@ -0,0 +1,19 @@ +#!/bin/bash +# Usage: unfold.sh links_... +pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs +links=$pdfs/links +f=$1 +IFS=' ' +awk '{if (NR==1) { o=$1 ; u=$2 } +else +{ if ($1=="annot" || $1=="scrape") { +print o,"\t",u +o=$1 +u=$2 +} +else { +u=u $1 +} +} +} +END { print o,"\t",u }' $f