changeset 148:f0bee28995f1

do the work for cdx2sql
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 25 Oct 2021 15:05:46 +0000
parents 11d973ecff4e
children bb24f94fe592
files bin/doC2S.sh
diffstat 1 files changed, 16 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/doC2S.sh	Mon Oct 25 15:05:46 2021 +0000
@@ -0,0 +1,16 @@
+#!/usr/bin/bash
+# Usage: doC2S.sh node task cc resdir workd i
+node=$1
+task=$2
+cc=$3
+resdir=$4
+workd=$5
+i=$6
+
+echo "> $node.$task: $i"
+rm -f $workd/cdx$i.db
+cdx2sql.py /beegfs/common_crawl/$cc/cdx/warc $i 2>$workd/cdx$i.errs | \
+sqlite3 $workd/cdx$i.db '.read results/cdx.sql' '.mode tabs' '.import /dev/stdin props' '.quit' 2>$workd/cdx$i.log
+echo "< $node.$task: $i";
+
+