changeset 66:75f1d3bc60d9

part 2 is now working for all types
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 19 Jul 2023 13:20:46 +0100
parents b88fdbe8bfa7
children b8d4a5ede7a3
files bin/warc.py
diffstat 1 files changed, 5 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/bin/warc.py	Wed Jul 19 13:19:58 2023 +0100
+++ b/bin/warc.py	Wed Jul 19 13:20:46 2023 +0100
@@ -121,12 +121,15 @@
         eob=bp+length
         while buf.startswith(b'\r\n',eob-2):
           eob-=2
-        bv=bufView[start_2:eob]
         # Only output parts (2 = HTTP header, 4 = body) that are wanted
         if parts & 2:
           if wtype is META or wtype is INFO:
             # rest of the part
-            OUT=callback(wtype,bv,2)
+            OUT=callback(wtype,bufView[start_2:eob],2)
+          else:
+            # request and response have http headers
+            eo2=buf.index(b'\r\n\r\n',start_2)
+            OUT=callback(wtype,bufView[start_2:eo2+2],2)
         if parts & 4:
           for L in rec_text:
             if state==2: