view bin/doC2S.sh @ 174:bfe9085a1d39

change account back
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 10 Jan 2023 17:48:26 +0000
parents 4e1364b8c89a
children
line wrap: on
line source

#!/usr/bin/bash
# Usage: doC2S.sh node task cc resdir workd i
node=$1
task=$2
cc=$3
resdir=$4
workd=$5
i=$6

echo "> $node.$task: $i"
rm -f $workd/cdx$i.db
cdx2sql.py /beegfs/common_crawl/$cc/cdx/warc $i 2>$workd/cdx$i.errs | \
  /lustre/home/dc007/hst/gentoo/usr/bin/sponge | \
  sqlite3 $workd/cdx$i.db '.read results/cdx.sql' '.mode tabs' '.import /dev/stdin props' '.quit' 2>$workd/cdx$i.log
echo "< $node.$task: $i";