annotate bin/test_warc.py @ 119:1d12b51c4d59

minor bug wrt EOF of final cdx input file
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 27 Sep 2023 17:29:51 +0100
parents b88fdbe8bfa7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 import warc,sys
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 OUT=open(sys.stdout.fileno(),'wb')
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 if (debug:=(sys.argv[1]=='-d')):
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 sys.argv.pop(1)
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7
63
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
8 tt=int(sys.argv.pop(1))
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
9
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 def showme(wtype,buf,part):
45
212da3fe3b19 make test 1 idempotent
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 42
diff changeset
11 # This should exactly reproduce a complete warc file if called
212da3fe3b19 make test 1 idempotent
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 42
diff changeset
12 # as per version 1 below
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 if debug:
52
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
14 OUT.write(b"----start %d-----\n"%part)
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
15 OUT.write(buf)
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
16 if buf[-1]!=10:
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
17 OUT.write(b'\r\n')
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
18 if part==7:
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
19 OUT.write(b'\r\n') # to match complete file formatting
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
20 if debug:
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
21 OUT.write(b"----end %d-----\n"%part)
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
22 return OUT
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23
63
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
24 if tt==1:
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
25 warc.warc(sys.argv[1],showme,[b'response','warcinfo','request','metadata'],parts=int(sys.argv[2]),debug=debug)
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
26 elif tt==2:
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
27 warc.warc(sys.argv[1],showme,[b'warcinfo'],parts=int(sys.argv[2]),debug=debug)
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
28 elif tt==3:
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
29 warc.warc(sys.argv[1],showme,[b'warcinfo'],whole=True,debug=debug)
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
30 elif tt==4:
9837840f3328 more tests
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 52
diff changeset
31 warc.warc(sys.argv[1],showme,[b'response','warcinfo','request','metadata'],whole=True,debug=debug)
65
b88fdbe8bfa7 add a response-only test
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 63
diff changeset
32 elif tt==5:
b88fdbe8bfa7 add a response-only test
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 63
diff changeset
33 warc.warc(sys.argv[1],showme,[b'response'],parts=int(sys.argv[2]),debug=debug)