Mercurial > hg > cc > cirrus_work
changeset 54:9c63039a9b6d
little steps
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 07 Jul 2023 19:30:23 +0100 |
parents | 0dc144bd027c |
children | 11a886a84a49 |
files | bin/warc.py |
diffstat | 1 files changed, 12 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/warc.py Fri Jul 07 19:04:16 2023 +0100 +++ b/bin/warc.py Fri Jul 07 19:30:23 2023 +0100 @@ -16,8 +16,8 @@ bufSize=2*1024*1024 hdrMax=16*1024 buf=bytearray(bufSize) - hdrBuf=memoryview(buf)[:hdrMax] - fpos=bl=stream.readinto(hdrBuf) + with memoryview(buf)[:hdrMax] as hdrBuf: + fpos=bl=stream.readinto(hdrBuf) while True: bp=0 while buf.startswith(b'\r\n',bp): @@ -47,22 +47,24 @@ if whole: pass # buf[bp:(bp:=bp+ln)]=l elif (parts & 1): - print('cb') OUT=callback(wtype,buf[bob:eol],1) if parts!=1: # everything from bv= goes here # need to read more if eol+length>hdrMax pass - print(wtype,bob,eol,length) + print(wtype,bob,bp,eol,length,file=sys.stderr) jumpTo=bp+length - buf[0:hdrMax-jumpTo]=buf[jumpTo:hdrMax] - stream.seek(fpos:=fpos+jumpTo) + buf[0:jumpTo]=buf[jumpTo:hdrMax] + _fpos=stream.seek(fpos:=fpos+jumpTo) + print('fp',_fpos,fpos,file=sys.stderr) if done: - continue - n=stream.readinto(memoryview(buf)[hdrMax-jumpTo:hdrMax]) - print('read',n) + print('finished',file=sys.stderr) + break + with memoryview(buf) as mv: + n=stream.readinto(mv[hdrMax-jumpTo:hdrMax]) + print('read',n,file=sys.stderr) if n<jumpTo or n==0: - print('done',n,jumpTo) + print('done',n,jumpTo,file=sys.stderr) done=True bp=0 #while not buf.startswith(b'\r\n',bp):