Mercurial > hg > cc > cirrus_work
annotate bin/test_warc.py @ 45:212da3fe3b19
make test 1 idempotent
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 05 Jul 2023 19:32:02 +0100 |
parents | 689a0e311cd2 |
children | e20c64917805 |
rev | line source |
---|---|
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 import warc,sys |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 OUT=open(sys.stdout.fileno(),'wb') |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 if (debug:=(sys.argv[1]=='-d')): |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 sys.argv.pop(1) |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 def showme(wtype,buf,part): |
45 | 9 # This should exactly reproduce a complete warc file if called |
10 # as per version 1 below | |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 if debug: |
45 | 12 OUT.write(b"%d\n%b"%(part,buf)) |
13 else: | |
14 OUT.write(buf) | |
15 if buf[-1]!=b'\n': | |
16 OUT.write(b'\r\n') | |
17 if part==7: | |
18 OUT.write(b'\r\n') # to match complete file formatting | |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
19 |
45 | 20 # 1 warc.warc(sys.argv[1],showme,[b'response','warcinfo','request','metadata'],whole=True) |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
21 |
45 | 22 # 2 |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 warc.warc(sys.argv[1],showme,[b'response'],parts=int(sys.argv[2]),debug=debug) |