annotate bin/doC2S.sh @ 161:df56132ef84a

x
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 17:59:43 +0100
parents 4e1364b8c89a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
148
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/bash
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # Usage: doC2S.sh node task cc resdir workd i
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 node=$1
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 task=$2
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 cc=$3
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 resdir=$4
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 workd=$5
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 i=$6
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 echo "> $node.$task: $i"
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 rm -f $workd/cdx$i.db
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 cdx2sql.py /beegfs/common_crawl/$cc/cdx/warc $i 2>$workd/cdx$i.errs | \
150
4e1364b8c89a working, with compound driver files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 148
diff changeset
13 /lustre/home/dc007/hst/gentoo/usr/bin/sponge | \
4e1364b8c89a working, with compound driver files
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 148
diff changeset
14 sqlite3 $workd/cdx$i.db '.read results/cdx.sql' '.mode tabs' '.import /dev/stdin props' '.quit' 2>$workd/cdx$i.log
148
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 echo "< $node.$task: $i";
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16
f0bee28995f1 do the work for cdx2sql
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17