Mercurial > hg > cc > cirrus_home
changeset 54:8154560f1e3d
as used successfully for 3rd run
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 13 Apr 2020 17:29:31 +0100 |
parents | 396d1f17c671 |
children | 50556ac15e88 |
files | bin/preExtract.sh |
diffstat | 1 files changed, 3 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/preExtract.sh Mon Apr 13 15:24:32 2020 +0100 +++ b/bin/preExtract.sh Mon Apr 13 17:29:31 2020 +0100 @@ -56,7 +56,7 @@ id=${p#CC-MAIN-*}-00$i echo " " "$id" >> log lf=logs/${jobid}_${i}_log - lff+=("${lf}") # accumulate list of log files + lff+=("${lf}") # accumulate list of warc.sh log files if [ -s $lf ] then echo " " $lf not empty, skipping extraction >> log @@ -84,7 +84,7 @@ ni=${#ii[@]} if [ $(wc -l < ${e}_check.txt) -ne $((ni + 1)) ] then - if egrep -n '^[1-9]' ${e}_diff.txt | \ + if egrep -n "^[1-9]" ${e}_diff.txt | \ $HOME/bin/nogood.py $e $ni 2>> log then continue @@ -98,4 +98,5 @@ echo "no diff, no update" $e >> log fi echo end extract: $e >> log + done '