Mercurial > hg > cc > cirrus_work
annotate bin/test_warc.py @ 119:1d12b51c4d59
minor bug wrt EOF of final cdx input file
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 27 Sep 2023 17:29:51 +0100 |
parents | b88fdbe8bfa7 |
children |
rev | line source |
---|---|
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 import warc,sys |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 OUT=open(sys.stdout.fileno(),'wb') |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 if (debug:=(sys.argv[1]=='-d')): |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 sys.argv.pop(1) |
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 |
63 | 8 tt=int(sys.argv.pop(1)) |
9 | |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 def showme(wtype,buf,part): |
45 | 11 # This should exactly reproduce a complete warc file if called |
12 # as per version 1 below | |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 if debug: |
52 | 14 OUT.write(b"----start %d-----\n"%part) |
15 OUT.write(buf) | |
16 if buf[-1]!=10: | |
17 OUT.write(b'\r\n') | |
18 if part==7: | |
19 OUT.write(b'\r\n') # to match complete file formatting | |
20 if debug: | |
21 OUT.write(b"----end %d-----\n"%part) | |
22 return OUT | |
42
689a0e311cd2
make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 |
63 | 24 if tt==1: |
25 warc.warc(sys.argv[1],showme,[b'response','warcinfo','request','metadata'],parts=int(sys.argv[2]),debug=debug) | |
26 elif tt==2: | |
27 warc.warc(sys.argv[1],showme,[b'warcinfo'],parts=int(sys.argv[2]),debug=debug) | |
28 elif tt==3: | |
29 warc.warc(sys.argv[1],showme,[b'warcinfo'],whole=True,debug=debug) | |
30 elif tt==4: | |
31 warc.warc(sys.argv[1],showme,[b'response','warcinfo','request','metadata'],whole=True,debug=debug) | |
65
b88fdbe8bfa7
add a response-only test
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
63
diff
changeset
|
32 elif tt==5: |
b88fdbe8bfa7
add a response-only test
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
63
diff
changeset
|
33 warc.warc(sys.argv[1],showme,[b'response'],parts=int(sys.argv[2]),debug=debug) |