# HG changeset patch # User Henry S. Thompson # Date 1688753056 -3600 # Node ID 0dc144bd027cc4f47bf9d72e9caeeab25e3e43f0 # Parent e20c649178055b5b4541b43db5cbc7b1ae5a52fc made 1 mean 1, still losing after a while diff -r e20c64917805 -r 0dc144bd027c bin/warc.py --- a/bin/warc.py Fri Jul 07 17:04:05 2023 +0100 +++ b/bin/warc.py Fri Jul 07 19:04:16 2023 +0100 @@ -18,11 +18,11 @@ buf=bytearray(bufSize) hdrBuf=memoryview(buf)[:hdrMax] fpos=bl=stream.readinto(hdrBuf) - bob=0 while True: bp=0 while buf.startswith(b'\r\n',bp): bp+=2 + bob=bp if not buf.startswith(b'WARC/1.0\r\n',bp): raise ValueError("Not a WARC file? At %s: %s[%s]"%(bp, buf[bp:min(bl,bp+20)].decode('latin-1'), bl-bp)) @@ -48,7 +48,7 @@ pass # buf[bp:(bp:=bp+ln)]=l elif (parts & 1): print('cb') - OUT=callback(wtype,buf[bp:eol+length],1) + OUT=callback(wtype,buf[bob:eol],1) if parts!=1: # everything from bv= goes here # need to read more if eol+length>hdrMax