# HG changeset patch # User Henry S. Thompson # Date 1689769246 -3600 # Node ID 75f1d3bc60d9c2c4a053a505d723312a83be6f1c # Parent b88fdbe8bfa713caa6d0f52f52d45b0e4455df29 part 2 is now working for all types diff -r b88fdbe8bfa7 -r 75f1d3bc60d9 bin/warc.py --- a/bin/warc.py Wed Jul 19 13:19:58 2023 +0100 +++ b/bin/warc.py Wed Jul 19 13:20:46 2023 +0100 @@ -121,12 +121,15 @@ eob=bp+length while buf.startswith(b'\r\n',eob-2): eob-=2 - bv=bufView[start_2:eob] # Only output parts (2 = HTTP header, 4 = body) that are wanted if parts & 2: if wtype is META or wtype is INFO: # rest of the part - OUT=callback(wtype,bv,2) + OUT=callback(wtype,bufView[start_2:eob],2) + else: + # request and response have http headers + eo2=buf.index(b'\r\n\r\n',start_2) + OUT=callback(wtype,bufView[start_2:eo2+2],2) if parts & 4: for L in rec_text: if state==2: