Mercurial > hg > cc > cirrus_home
annotate bin/nogood.py @ 52:9cd9daf75183
working towards more robust diff checking
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 13 Apr 2020 14:12:12 +0100 |
parents | |
children | 396d1f17c671 |
rev | line source |
---|---|
52
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/lustre/sw/miniconda3/bin/python3 |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 '''Detect bad diffs between warc.sh log file and existing extract_...tar |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 Usage: nogood.py segid numChanged |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 Note the slightly counter-intuitive value: 0 if the input is no good''' |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 import sys,re |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 LONG_ADD=re.compile('[1-9][0-9]*a[1-9][0-9]*,[1-9][0-9]*') |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 (segment,n)=map(int,sys.argv[1:]) |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 c=0 |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 for l in sys.stdin: |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 (i,d)=l.rstrip().split(':') |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 if LONG_ADD.fullmatch(d): |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
12 c+=1 |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 continue |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
14 else: |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
15 print("non-simple diff at %s: %s for segment %s"%(i,d,segment),file=sys.stderr) |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
16 exit(0) |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
17 if c!=n: |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
18 print("too many diffs? (%s<>%s) for segment %s"%(c,n,segment),file=sys.stderr) |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
19 exit(0) |
9cd9daf75183
working towards more robust diff checking
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
20 exit(1) |