changeset 53:0dc144bd027c

made 1 mean 1, still losing after a while
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Jul 2023 19:04:16 +0100
parents e20c64917805
children 9c63039a9b6d
files bin/warc.py
diffstat 1 files changed, 2 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/bin/warc.py	Fri Jul 07 17:04:05 2023 +0100
+++ b/bin/warc.py	Fri Jul 07 19:04:16 2023 +0100
@@ -18,11 +18,11 @@
   buf=bytearray(bufSize)
   hdrBuf=memoryview(buf)[:hdrMax]
   fpos=bl=stream.readinto(hdrBuf)
-  bob=0
   while True:
     bp=0
     while buf.startswith(b'\r\n',bp):
       bp+=2
+    bob=bp
     if not buf.startswith(b'WARC/1.0\r\n',bp):
       raise ValueError("Not a WARC file? At %s: %s[%s]"%(bp,
                        buf[bp:min(bl,bp+20)].decode('latin-1'), bl-bp))
@@ -48,7 +48,7 @@
         pass # buf[bp:(bp:=bp+ln)]=l
       elif (parts & 1):
         print('cb')
-        OUT=callback(wtype,buf[bp:eol+length],1)
+        OUT=callback(wtype,buf[bob:eol],1)
       if parts!=1:
         # everything from bv= goes here
         # need to read more if eol+length>hdrMax