annotate bin/test_warc.py @ 52:e20c64917805

better debugging output
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Jul 2023 17:04:05 +0100
parents 212da3fe3b19
children 9837840f3328
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 import warc,sys
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 OUT=open(sys.stdout.fileno(),'wb')
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 if (debug:=(sys.argv[1]=='-d')):
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 sys.argv.pop(1)
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 def showme(wtype,buf,part):
45
212da3fe3b19 make test 1 idempotent
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 42
diff changeset
9 # This should exactly reproduce a complete warc file if called
212da3fe3b19 make test 1 idempotent
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 42
diff changeset
10 # as per version 1 below
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 if debug:
52
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
12 OUT.write(b"----start %d-----\n"%part)
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
13 OUT.write(buf)
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
14 if buf[-1]!=10:
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
15 OUT.write(b'\r\n')
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
16 if part==7:
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
17 OUT.write(b'\r\n') # to match complete file formatting
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
18 if debug:
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
19 OUT.write(b"----end %d-----\n"%part)
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
20 return OUT
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21
52
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
22 # 1
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
23 warc.warc(sys.argv[1],showme,[b'response','warcinfo','request','metadata'],parts=int(sys.argv[2]),debug=debug)
42
689a0e311cd2 make warc.py a library, separate out testing
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24
45
212da3fe3b19 make test 1 idempotent
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 42
diff changeset
25 # 2
52
e20c64917805 better debugging output
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 45
diff changeset
26 #warc.warc(sys.argv[1],showme,[b'warcinfo'],parts=int(sys.argv[2]),debug=debug)