changeset 54:9c63039a9b6d

little steps
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Jul 2023 19:30:23 +0100
parents 0dc144bd027c
children 11a886a84a49
files bin/warc.py
diffstat 1 files changed, 12 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/bin/warc.py	Fri Jul 07 19:04:16 2023 +0100
+++ b/bin/warc.py	Fri Jul 07 19:30:23 2023 +0100
@@ -16,8 +16,8 @@
   bufSize=2*1024*1024
   hdrMax=16*1024
   buf=bytearray(bufSize)
-  hdrBuf=memoryview(buf)[:hdrMax]
-  fpos=bl=stream.readinto(hdrBuf)
+  with memoryview(buf)[:hdrMax] as hdrBuf:
+    fpos=bl=stream.readinto(hdrBuf)
   while True:
     bp=0
     while buf.startswith(b'\r\n',bp):
@@ -47,22 +47,24 @@
       if whole:
         pass # buf[bp:(bp:=bp+ln)]=l
       elif (parts & 1):
-        print('cb')
         OUT=callback(wtype,buf[bob:eol],1)
       if parts!=1:
         # everything from bv= goes here
         # need to read more if eol+length>hdrMax
         pass
-    print(wtype,bob,eol,length)
+    print(wtype,bob,bp,eol,length,file=sys.stderr)
     jumpTo=bp+length
-    buf[0:hdrMax-jumpTo]=buf[jumpTo:hdrMax]
-    stream.seek(fpos:=fpos+jumpTo)
+    buf[0:jumpTo]=buf[jumpTo:hdrMax]
+    _fpos=stream.seek(fpos:=fpos+jumpTo)
+    print('fp',_fpos,fpos,file=sys.stderr)
     if done:
-      continue
-    n=stream.readinto(memoryview(buf)[hdrMax-jumpTo:hdrMax])
-    print('read',n)
+      print('finished',file=sys.stderr)
+      break
+    with memoryview(buf) as mv:
+      n=stream.readinto(mv[hdrMax-jumpTo:hdrMax])
+    print('read',n,file=sys.stderr)
     if n<jumpTo or n==0:
-      print('done',n,jumpTo)
+      print('done',n,jumpTo,file=sys.stderr)
       done=True
     bp=0
     #while not buf.startswith(b'\r\n',bp):