# HG changeset patch # User Henry S. Thompson # Date 1584529727 0 # Node ID a82c325e8b32ba1920f067495b210b3397f46a96 # Parent 6fdebccf6492e0e002b01628f9b5393af675fe34 diff -r 6fdebccf6492 -r a82c325e8b32 bin/extract.sh --- a/bin/extract.sh Wed Mar 18 11:08:23 2020 +0000 +++ b/bin/extract.sh Wed Mar 18 11:08:47 2020 +0000 @@ -1,11 +1,13 @@ #!/bin/bash -me=$$ -SHARED=/home/shared/ht -cd $SHARED/data/$(hostname) +# Usage extract.sh ccid segid +ccid=$1 +segid=$2 +@@me=$$ +@@cd $SHARED/data/$(hostname) mkdir -p logs while read id do echo starting ${id} $(date) >> logs/${me}_log - unpigz -dp 1 -c /data/common_crawl/CC-MAIN-2019-35/CC-MAIN-${id}.warc.gz|$SHARED/bin/warc.sh ${id} application/pdf 2>> logs/${me}_log - echo finished ${id} $(date) >> logs/${me}_log + unpigz -dp 1 -c /beegfs/common-crawl/CC-MAIN-${ccid}/${segid}/CC-MAIN-${id}.warc.gz|$HOME/lib/valhalla/bin/warc.sh ${id} application/pdf 2>> logs/@@${me}_log + echo finished ${id} $(date) >> logs/@@${me}_log done diff -r 6fdebccf6492 -r a82c325e8b32 bin/test.sh --- a/bin/test.sh Wed Mar 18 11:08:23 2020 +0000 +++ b/bin/test.sh Wed Mar 18 11:08:47 2020 +0000 @@ -1,5 +1,5 @@ -#!/bin/bash -pwd -parallel --will-cite --nonall -S r1i5n0 -S r1i5n1 'echo $$ $(hostname); n=$(echo $(hostname)|cut -c 6); nohup sleep $((n*5)); echo done $n' -echo pdone - +#!/usr/bin/bash +# test master +echo $(date) $(hostname) "$1" +echo testing... $(cat test_$1.txt) +echo $(date) $(hostname) $? diff -r 6fdebccf6492 -r a82c325e8b32 bin/unfold.sh --- a/bin/unfold.sh Wed Mar 18 11:08:23 2020 +0000 +++ b/bin/unfold.sh Wed Mar 18 11:08:47 2020 +0000 @@ -3,7 +3,7 @@ pdfs=/beegfs/common_crawl/CC-MAIN-2019-35/pdfs links=$pdfs/links f=$1 -IFS=' ' +IFS=' ' awk '{if (NR==1) { o=$1 ; u=$2 } else { if ($1=="annot" || $1=="scrape") { diff -r 6fdebccf6492 -r a82c325e8b32 testJob.sh --- a/testJob.sh Wed Mar 18 11:08:23 2020 +0000 +++ b/testJob.sh Wed Mar 18 11:08:47 2020 +0000 @@ -4,11 +4,11 @@ #PBS -l walltime=08:00:00 #PBS -V #PBS -A dc007 -#PBS -N plinks +#PBS -N test #module load mpt cd ${PBS_O_WORKDIR} -bin/test.sh +bin/dummy.sh