Mercurial > hg > cc > cirrus_home
changeset 46:2e5b3439a2ed
start try to work around failures
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 08 Apr 2020 14:11:04 +0100 |
parents | bd0010ac88ce |
children | 81ff28478276 |
files | bin/preExtract.sh |
diffstat | 1 files changed, 31 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/preExtract.sh Wed Apr 08 11:27:33 2020 +0100 +++ b/bin/preExtract.sh Wed Apr 08 14:11:04 2020 +0100 @@ -55,9 +55,9 @@ echo "$id" 1>&2 lf=logs/${jobid}_${i}_log lff+=("${lf}") # accumulate list of log files - echo starting ${id} $(date) > $lf - unpigz -dp 1 -c ${h}/${s}/CC-MAIN-${id}.warc.gz | $HOME/lib/valhalla/bin/warc.sh ${id} application/pdf 2>> $lf - echo finished ${id} $(date) >> $lf + #echo starting ${id} $(date) > $lf + #unpigz -dp 1 -c ${h}/${s}/CC-MAIN-${id}.warc.gz | $HOME/lib/valhalla/bin/warc.sh ${id} application/pdf 2>> $lf + #echo finished ${id} $(date) >> $lf ls -l ${id}_* | tr -s " " "\t" |cut -f 5,9 >> $lsf ii+=("*-00${i}_*") echo "$i" "${ii[@]}" ${#ii[@]} 1>&2 @@ -68,23 +68,33 @@ tar -tvf ${h}/${s}/extract_${e}.tar "${ii[@]}" | \ tr -s " " "\t" |cut -f 3,6 |sort -k2.1,2.36 -k2.38,2n | diff -bw \ - <(sort -k2.1,2.36 -k2.38,2n $lsf) > ${e}_diff.txt - cut -f 1 -d " " ${e}_diff.txt | sus > ${e}_check.txt - ni=${#ii[@]} - if [ $(wc -l < ${e}_check.txt) -ne $((ni + 1)) ] - then - echo "extra lines in ${e}_check.txt" 1>&2 - cd .. - break - fi - if [ $(fgrep -c a ${e}_check.txt) -ne $ni ] - then - echo "non-addition lines in ${e}_check.txt" 1>&2 - cd .. - break - fi - egrep "^> " ${e}_diff.txt | cut -f 2 > ${e}_new.txt - tar --delete -f ${h}/${s}/extract_${e}.tar "${lff[@]}" - tar --append -f ${h}/${s}/extract_${e}.tar --files-from=${e}_new.txt "${lff[@]}" - done + if [ -s ${e}_diff.txt ] + then + cut -f 1 -d " " ${e}_diff.txt | sus > ${e}_check.txt + ni=${#ii[@]} + if [ $(wc -l < ${e}_check.txt) -ne $((ni + 1)) ] + then + if [[ "$(tr -s '\n\t ' ' ' < ${e}_check.txt)" =~ \ + ^' '[0-9]*' > 1 < 1 --- 1 '[0-9]*c[0-9]*' 1 '[0-9]*'a'[0-9,]*' '$ ]] + then + : + else + echo "extra lines in ${e}_check.txt" 1>&2 + cd .. + continue + fi + elif [ $(fgrep -c a ${e}_check.txt) -ne $ni ] + then + echo "non-addition lines in ${e}_check.txt" 1>&2 + cd .. + continue + fi + egrep "^> " ${e}_diff.txt | cut -f 2 > ${e}_new.txt + tar --delete -f ${h}/${s}/extract_${e}.tar "${lff[@]}" + tar --append -f ${h}/${s}/extract_${e}.tar --files-from=${e}_new.txt "${lff[@]}" + else + echo "no diff, no update" $e + fi + done cd .. '