# HG changeset patch # User Henry S. Thompson # Date 1741361976 0 # Node ID 0267374361f49cfe7dfeadfcb4ac848e5582a3bc # Parent 25d49e1f6c1d1488b540d94783017141d5a2e76b type decls, cythonize works diff -r 25d49e1f6c1d -r 0267374361f4 lib/python/cc/lmh/lmh.py --- a/lib/python/cc/lmh/lmh.py Wed Mar 05 23:29:25 2025 +0000 +++ b/lib/python/cc/lmh/lmh.py Fri Mar 07 15:39:36 2025 +0000 @@ -7,16 +7,27 @@ import re,warc,sys,glob,codecs,os.path +import cython, typing -TUPAT=re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE) -DPAT=re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE) -LMPAT=re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE) +TUPAT: typing.Pattern[cython.bytes] = re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE) +DPAT: typing.Pattern[cython.bytes] = re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE) +LMPAT: typing.Pattern[cython.bytes] = re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE) + +DTAB: cython.bytes = bytearray(range(256)) +DDEL: cython.bytes = b'TZ-:' -DTAB=bytearray(range(256)) -DDEL=b'TZ-:' +URI: cython.bytes +DATE: cython.bytes +EXTRAS: bool +SEGMENT: cython.bytes +FILETYPE: cython.bytes +FILENO: cython.bytes +OUT: typing.BinaryIO -def showmeLMH(wtype,buf,part): +def showmeLMH(wtype: cython.bytes, buf: char[::1] , part: int) -> None: global URI, DATE, SEGMENT, FILETYPE, FILENO + m: typing.Match[cython.bytes] | None + mm: typing.Match[cython.bytes] | None if part==1: if (m:=TUPAT.search(buf)): URI=m[1] @@ -51,7 +62,7 @@ try: infile_name=glob.glob(infile_pat)[0] except IndexError: - print(infile_pat,CCdata,segment,filetype,fileno,file=sys.stderr) + print(infile_pat,CCdate,segment,filetype,fileno,file=sys.stderr) raise (_,_,_,_,_,_,_,ff)=infile_name.split('/')