Mercurial > hg > cc > cirrus_home
changeset 148:f0bee28995f1
do the work for cdx2sql
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 25 Oct 2021 15:05:46 +0000 |
parents | 11d973ecff4e |
children | bb24f94fe592 |
files | bin/doC2S.sh |
diffstat | 1 files changed, 16 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/doC2S.sh Mon Oct 25 15:05:46 2021 +0000 @@ -0,0 +1,16 @@ +#!/usr/bin/bash +# Usage: doC2S.sh node task cc resdir workd i +node=$1 +task=$2 +cc=$3 +resdir=$4 +workd=$5 +i=$6 + +echo "> $node.$task: $i" +rm -f $workd/cdx$i.db +cdx2sql.py /beegfs/common_crawl/$cc/cdx/warc $i 2>$workd/cdx$i.errs | \ +sqlite3 $workd/cdx$i.db '.read results/cdx.sql' '.mode tabs' '.import /dev/stdin props' '.quit' 2>$workd/cdx$i.log +echo "< $node.$task: $i"; + +