annotate bin/unfold.sh @ 194:1845222b3d73

move DummyContext out
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 25 Sep 2024 17:45:52 +0100
parents a82c325e8b32
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/bin/bash
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # Usage: unfold.sh links_...
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 links=$pdfs/links
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 f=$1
25
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 18
diff changeset
6 IFS=' '
18
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 awk '{if (NR==1) { o=$1 ; u=$2 }
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 else
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 { if ($1=="annot" || $1=="scrape") {
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 print o,"\t",u
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 o=$1
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 u=$2
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 }
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 else {
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 u=u $1
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 }
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 }
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 }
6662a353379a fix a mis-folded link file
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 END { print o,"\t",u }' $f