changeset 283:6739e08d19ff

type decls, cythonize works
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Mar 2025 18:15:41 +0000
parents 0267374361f4
children e461601592dd
files lib/python/cc/warc.py
diffstat 1 files changed, 21 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/warc.py	Fri Mar 07 15:39:36 2025 +0000
+++ b/lib/python/cc/warc.py	Fri Mar 07 18:15:41 2025 +0000
@@ -5,29 +5,35 @@
 
 import sys, io
 from isal import igzip
+import cython, typing
 
-RESP = b'response'
-REQ = b'request'
-META = b'metadata'
-INFO = b'warcinfo'
+
+RESP: cython.bytes = b'response'
+REQ: cython.bytes = b'request'
+META: cython.bytes = b'metadata'
+INFO: cython.bytes = b'warcinfo'
 
-BUFSIZE=2*1024*1024
-HDRMAX=32*1024  # Not really max, there are some enormous ones, see below
+BUFSIZE: int = 2*1024*1024
+HDRMAX: int = 32*1024  # Not really max, there are some enormous ones, see below
 
-def refill(buf, bufView, stream, start_1, bl, bp, eol, length, needed):
+def refill(buf: char[::1], bufView: char[::1], stream: typing.BinaryIO,
+           start_1: int, bl: int, bp: int, eol: int,
+        length: int, needed: bool) -> (int, int, int, cython.bytes, int, char[::1], bool):
   global BUFSIZE
+  whole: int
+  xBuf: char[::1]
   #if (stream.tell() > 2381000000):
   #  breakpoint()
   if needed:
     # we need to keep from start_1 to bl
-    keepFrom=start_1
-    keepLen=bl-keepFrom
+    keepFrom: int = start_1
+    keepLen: int = bl-keepFrom
     if (whole:=((bp-start_1)+length)) > BUFSIZE:
       while whole > BUFSIZE:
         # Need a bigger buffer
         print('Growing buffer %s > %s'%(whole,BUFSIZE),file=sys.stderr)
         BUFSIZE=BUFSIZE+(64 * 1024)
-      newbuf = bytearray(BUFSIZE)
+      newbuf: char[::1] = bytearray(BUFSIZE)
       newbuf[0:keepLen]=bufView[keepFrom:bl]
       bl = BUFSIZE
       buf = newbuf
@@ -49,7 +55,7 @@
       keepLen=0
       stream.seek(stream.tell() + bp + length - bl)
     bp=0
-  spaceToFill=BUFSIZE-keepLen
+  spaceToFill: int = BUFSIZE-keepLen
   with memoryview(buf)[keepLen:BUFSIZE] as xBuf:
     nb=stream.readinto(xBuf)
   bl=keepLen+nb
@@ -166,6 +172,8 @@
             eo2=buf.index(b'\r\n\r\n',start_2)
             OUT=callback(wtype,bufView[start_2:eo2+2],2)
         if parts & 4:
+          # stale below here???
+          rec_text = []
           for L in rec_text:
             if state==2:
               # HTTP header
@@ -186,7 +194,8 @@
                 if bl is not None:
                   if bl!=wl:
                     print("length mismatch: %s %s %s here: %s given: %s trunc: %s"%\
-                          (length,offset,filename,wl,bl,tr),file=sys.stderr)
+                          (length,#offset,
+                           filename,wl,bl,tr),file=sys.stderr)
                 # HTTP body
                 balance=start_2+rec_text.tell()
                 #print(balance,bl,wl,ll,ll-balance,file=sys.stderr)