changeset 60:7b68c3ebc35a

tests 1 & 2 now working
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 13 Jul 2023 14:02:02 +0100
parents 5d40d7511374
children f182d09ad1cd
files bin/warc.py
diffstat 1 files changed, 13 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/bin/warc.py	Thu Jul 13 11:28:24 2023 +0100
+++ b/bin/warc.py	Thu Jul 13 14:02:02 2023 +0100
@@ -72,17 +72,25 @@
         keepFrom=start_1
         keepLen=bl-keepFrom
         buf[0:keepLen]=bufView[keepFrom:bl]
+        eol=eol-start_1
+        start_1=0
+        bp=eol+2
       else:
         # we can skip the rest of this part
-        keepLen=0
-        fpos=stream.seek(fpos+(bp+length-bl))
+        if (bp+length)<=bl:
+          # we have at least some bytes from the next part
+          keepLen=bl-(bp+length)
+          buf[0:keepLen]=bufView[bl-keepLen:bl]
+        else:
+          # we don't have all of the bytes from the current part
+          #  so can skip the rest of it
+          keepLen=0
+          fpos=stream.seek(fpos+(bp+length-bl))
+        bp=0
       spaceToFill=bufSize-keepLen
       with memoryview(buf)[keepLen:bufSize] as xBuf:
         nb=stream.readinto(xBuf)
       fpos+=nb
-      eol=eol-start_1
-      start_1=0
-      bp=eol+2
       bl=keepLen+nb
       if nb<spaceToFill:
         done=True