Mercurial > hg > cc > cirrus_home
diff bin/nogood.py @ 52:9cd9daf75183
working towards more robust diff checking
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 13 Apr 2020 14:12:12 +0100 |
parents | |
children | 396d1f17c671 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/nogood.py Mon Apr 13 14:12:12 2020 +0100 @@ -0,0 +1,20 @@ +#!/lustre/sw/miniconda3/bin/python3 +'''Detect bad diffs between warc.sh log file and existing extract_...tar +Usage: nogood.py segid numChanged +Note the slightly counter-intuitive value: 0 if the input is no good''' +import sys,re +LONG_ADD=re.compile('[1-9][0-9]*a[1-9][0-9]*,[1-9][0-9]*') +(segment,n)=map(int,sys.argv[1:]) +c=0 +for l in sys.stdin: + (i,d)=l.rstrip().split(':') + if LONG_ADD.fullmatch(d): + c+=1 + continue + else: + print("non-simple diff at %s: %s for segment %s"%(i,d,segment),file=sys.stderr) + exit(0) +if c!=n: + print("too many diffs? (%s<>%s) for segment %s"%(c,n,segment),file=sys.stderr) + exit(0) +exit(1)