changeset 282:0267374361f4

type decls, cythonize works
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Mar 2025 15:39:36 +0000
parents 25d49e1f6c1d
children 6739e08d19ff
files lib/python/cc/lmh/lmh.py
diffstat 1 files changed, 18 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/lmh/lmh.py	Wed Mar 05 23:29:25 2025 +0000
+++ b/lib/python/cc/lmh/lmh.py	Fri Mar 07 15:39:36 2025 +0000
@@ -7,16 +7,27 @@
 
 
 import re,warc,sys,glob,codecs,os.path
+import cython, typing
 
-TUPAT=re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE)
-DPAT=re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE)
-LMPAT=re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE)
+TUPAT: typing.Pattern[cython.bytes] = re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE)
+DPAT: typing.Pattern[cython.bytes] = re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE)
+LMPAT: typing.Pattern[cython.bytes] = re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE)
+
+DTAB: cython.bytes = bytearray(range(256))
+DDEL: cython.bytes = b'TZ-:'
 
-DTAB=bytearray(range(256))
-DDEL=b'TZ-:'
+URI: cython.bytes
+DATE: cython.bytes
+EXTRAS: bool
+SEGMENT: cython.bytes
+FILETYPE: cython.bytes
+FILENO: cython.bytes
+OUT: typing.BinaryIO
 
-def showmeLMH(wtype,buf,part):
+def showmeLMH(wtype: cython.bytes, buf: char[::1] , part: int) -> None:
   global URI, DATE, SEGMENT, FILETYPE, FILENO
+  m: typing.Match[cython.bytes] | None
+  mm: typing.Match[cython.bytes] | None
   if part==1:
     if (m:=TUPAT.search(buf)):
       URI=m[1]
@@ -51,7 +62,7 @@
   try:
     infile_name=glob.glob(infile_pat)[0]
   except IndexError:
-    print(infile_pat,CCdata,segment,filetype,fileno,file=sys.stderr)
+    print(infile_pat,CCdate,segment,filetype,fileno,file=sys.stderr)
     raise
 
   (_,_,_,_,_,_,_,ff)=infile_name.split('/')