Mercurial > hg > cc > cirrus_home
changeset 53:396d1f17c671
ready to try another pass with robust diff checking
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 13 Apr 2020 15:24:32 +0100 |
parents | 9cd9daf75183 |
children | 8154560f1e3d |
files | bin/nogood.py bin/preExtract.sh |
diffstat | 2 files changed, 37 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/nogood.py Mon Apr 13 14:12:12 2020 +0100 +++ b/bin/nogood.py Mon Apr 13 15:24:32 2020 +0100 @@ -3,7 +3,12 @@ Usage: nogood.py segid numChanged Note the slightly counter-intuitive value: 0 if the input is no good''' import sys,re -LONG_ADD=re.compile('[1-9][0-9]*a[1-9][0-9]*,[1-9][0-9]*') +shortPat='[1-9][0-9]*%s[1-9][0-9]*' +SHORT_ADD=re.compile(shortPat%'a') +SHORT_CHANGE=re.compile(shortPat%'c') +longPat=shortPat+',[1-9][0-9]*' +LONG_ADD=re.compile(longPat%'a') +LONG_CHANGE=re.compile(longPat%'c') (segment,n)=map(int,sys.argv[1:]) c=0 for l in sys.stdin: @@ -11,10 +16,35 @@ if LONG_ADD.fullmatch(d): c+=1 continue - else: - print("non-simple diff at %s: %s for segment %s"%(i,d,segment),file=sys.stderr) - exit(0) + elif SHORT_ADD.fullmatch(d): + # as in 1566027315865.44/40_diff.txt + try: + (j,e)=sys.stdin.readline().rstrip().split(':') + except ValueError: + # EOF, I think + # OK, maybe + c+=1 + continue + if int(j)==int(i)+2 and LONG_CHANGE.fullmatch(e): + c+=1 + continue + elif SHORT_CHANGE.fullmatch(d): + # as in 1566027315865.44/40_diff.txt + try: + (j,e)=sys.stdin.readline().rstrip().split(':') + except ValueError: + # EOF, I think + # OK, maybe + c+=1 + continue + if int(j)==int(i)+4 and LONG_ADD.fullmatch(e): + c+=1 + continue + print("can't handle diff at %s: %s for segment %s"%(i,d,segment), + file=sys.stderr) + exit(0) if c!=n: - print("too many diffs? (%s<>%s) for segment %s"%(c,n,segment),file=sys.stderr) + print("too many diffs? (%s<>%s) for segment %s"%(c,n,segment), + file=sys.stderr) exit(0) exit(1)
--- a/bin/preExtract.sh Mon Apr 13 14:12:12 2020 +0100 +++ b/bin/preExtract.sh Mon Apr 13 15:24:32 2020 +0100 @@ -84,7 +84,8 @@ ni=${#ii[@]} if [ $(wc -l < ${e}_check.txt) -ne $((ni + 1)) ] then - if egrep -n '^[1-9]' ${e}_diff.txt | $HOME/bin/nogood.py $e $ni + if egrep -n '^[1-9]' ${e}_diff.txt | \ + $HOME/bin/nogood.py $e $ni 2>> log then continue fi