# HG changeset patch # User Henry S. Thompson # Date 1635174346 0 # Node ID f0bee28995f162b88694f512f815ffbd2aa353eb # Parent 11d973ecff4efb16da91b71290220e3b015d13a0 do the work for cdx2sql diff -r 11d973ecff4e -r f0bee28995f1 bin/doC2S.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/doC2S.sh Mon Oct 25 15:05:46 2021 +0000 @@ -0,0 +1,16 @@ +#!/usr/bin/bash +# Usage: doC2S.sh node task cc resdir workd i +node=$1 +task=$2 +cc=$3 +resdir=$4 +workd=$5 +i=$6 + +echo "> $node.$task: $i" +rm -f $workd/cdx$i.db +cdx2sql.py /beegfs/common_crawl/$cc/cdx/warc $i 2>$workd/cdx$i.errs | \ +sqlite3 $workd/cdx$i.db '.read results/cdx.sql' '.mode tabs' '.import /dev/stdin props' '.quit' 2>$workd/cdx$i.log +echo "< $node.$task: $i"; + +