changeset 54:8154560f1e3d

as used successfully for 3rd run
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 13 Apr 2020 17:29:31 +0100
parents 396d1f17c671
children 50556ac15e88
files bin/preExtract.sh
diffstat 1 files changed, 3 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/bin/preExtract.sh	Mon Apr 13 15:24:32 2020 +0100
+++ b/bin/preExtract.sh	Mon Apr 13 17:29:31 2020 +0100
@@ -56,7 +56,7 @@
 	    id=${p#CC-MAIN-*}-00$i
 	    echo " " "$id" >> log
 	    lf=logs/${jobid}_${i}_log
-	    lff+=("${lf}") # accumulate list of log files
+	    lff+=("${lf}") # accumulate list of warc.sh log files
             if [ -s $lf ]
             then
               echo "  " $lf not empty, skipping extraction >> log
@@ -84,7 +84,7 @@
 	  ni=${#ii[@]}
 	  if [ $(wc -l < ${e}_check.txt) -ne $((ni + 1)) ]
 	  then
-            if egrep -n '^[1-9]' ${e}_diff.txt | \
+            if egrep -n "^[1-9]" ${e}_diff.txt | \
                   $HOME/bin/nogood.py $e $ni 2>> log
             then
               continue
@@ -98,4 +98,5 @@
           echo "no diff, no update" $e >> log
         fi
         echo end extract: $e >> log
+     done   
 '