view bin/doC2S.sh @ 150:4e1364b8c89a

working, with compound driver files
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 26 Oct 2021 14:05:35 +0000
parents f0bee28995f1
children
line wrap: on
line source

#!/usr/bin/bash
# Usage: doC2S.sh node task cc resdir workd i
node=$1
task=$2
cc=$3
resdir=$4
workd=$5
i=$6

echo "> $node.$task: $i"
rm -f $workd/cdx$i.db
cdx2sql.py /beegfs/common_crawl/$cc/cdx/warc $i 2>$workd/cdx$i.errs | \
  /lustre/home/dc007/hst/gentoo/usr/bin/sponge | \
  sqlite3 $workd/cdx$i.db '.read results/cdx.sql' '.mode tabs' '.import /dev/stdin props' '.quit' 2>$workd/cdx$i.log
echo "< $node.$task: $i";