# HG changeset patch # User Henry S. Thompson # Date 1586787872 -3600 # Node ID 396d1f17c67109af09eeed49c12e8c9e7a9ab926 # Parent 9cd9daf7518303dc94db804768759d97efc23178 ready to try another pass with robust diff checking diff -r 9cd9daf75183 -r 396d1f17c671 bin/nogood.py --- a/bin/nogood.py Mon Apr 13 14:12:12 2020 +0100 +++ b/bin/nogood.py Mon Apr 13 15:24:32 2020 +0100 @@ -3,7 +3,12 @@ Usage: nogood.py segid numChanged Note the slightly counter-intuitive value: 0 if the input is no good''' import sys,re -LONG_ADD=re.compile('[1-9][0-9]*a[1-9][0-9]*,[1-9][0-9]*') +shortPat='[1-9][0-9]*%s[1-9][0-9]*' +SHORT_ADD=re.compile(shortPat%'a') +SHORT_CHANGE=re.compile(shortPat%'c') +longPat=shortPat+',[1-9][0-9]*' +LONG_ADD=re.compile(longPat%'a') +LONG_CHANGE=re.compile(longPat%'c') (segment,n)=map(int,sys.argv[1:]) c=0 for l in sys.stdin: @@ -11,10 +16,35 @@ if LONG_ADD.fullmatch(d): c+=1 continue - else: - print("non-simple diff at %s: %s for segment %s"%(i,d,segment),file=sys.stderr) - exit(0) + elif SHORT_ADD.fullmatch(d): + # as in 1566027315865.44/40_diff.txt + try: + (j,e)=sys.stdin.readline().rstrip().split(':') + except ValueError: + # EOF, I think + # OK, maybe + c+=1 + continue + if int(j)==int(i)+2 and LONG_CHANGE.fullmatch(e): + c+=1 + continue + elif SHORT_CHANGE.fullmatch(d): + # as in 1566027315865.44/40_diff.txt + try: + (j,e)=sys.stdin.readline().rstrip().split(':') + except ValueError: + # EOF, I think + # OK, maybe + c+=1 + continue + if int(j)==int(i)+4 and LONG_ADD.fullmatch(e): + c+=1 + continue + print("can't handle diff at %s: %s for segment %s"%(i,d,segment), + file=sys.stderr) + exit(0) if c!=n: - print("too many diffs? (%s<>%s) for segment %s"%(c,n,segment),file=sys.stderr) + print("too many diffs? (%s<>%s) for segment %s"%(c,n,segment), + file=sys.stderr) exit(0) exit(1) diff -r 9cd9daf75183 -r 396d1f17c671 bin/preExtract.sh --- a/bin/preExtract.sh Mon Apr 13 14:12:12 2020 +0100 +++ b/bin/preExtract.sh Mon Apr 13 15:24:32 2020 +0100 @@ -84,7 +84,8 @@ ni=${#ii[@]} if [ $(wc -l < ${e}_check.txt) -ne $((ni + 1)) ] then - if egrep -n '^[1-9]' ${e}_diff.txt | $HOME/bin/nogood.py $e $ni + if egrep -n '^[1-9]' ${e}_diff.txt | \ + $HOME/bin/nogood.py $e $ni 2>> log then continue fi