Mercurial > hg > cc > cirrus_work
changeset 282:0267374361f4
type decls, cythonize works
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 07 Mar 2025 15:39:36 +0000 |
parents | 25d49e1f6c1d |
children | 6739e08d19ff |
files | lib/python/cc/lmh/lmh.py |
diffstat | 1 files changed, 18 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/python/cc/lmh/lmh.py Wed Mar 05 23:29:25 2025 +0000 +++ b/lib/python/cc/lmh/lmh.py Fri Mar 07 15:39:36 2025 +0000 @@ -7,16 +7,27 @@ import re,warc,sys,glob,codecs,os.path +import cython, typing -TUPAT=re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE) -DPAT=re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE) -LMPAT=re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE) +TUPAT: typing.Pattern[cython.bytes] = re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE) +DPAT: typing.Pattern[cython.bytes] = re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE) +LMPAT: typing.Pattern[cython.bytes] = re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE) + +DTAB: cython.bytes = bytearray(range(256)) +DDEL: cython.bytes = b'TZ-:' -DTAB=bytearray(range(256)) -DDEL=b'TZ-:' +URI: cython.bytes +DATE: cython.bytes +EXTRAS: bool +SEGMENT: cython.bytes +FILETYPE: cython.bytes +FILENO: cython.bytes +OUT: typing.BinaryIO -def showmeLMH(wtype,buf,part): +def showmeLMH(wtype: cython.bytes, buf: char[::1] , part: int) -> None: global URI, DATE, SEGMENT, FILETYPE, FILENO + m: typing.Match[cython.bytes] | None + mm: typing.Match[cython.bytes] | None if part==1: if (m:=TUPAT.search(buf)): URI=m[1] @@ -51,7 +62,7 @@ try: infile_name=glob.glob(infile_pat)[0] except IndexError: - print(infile_pat,CCdata,segment,filetype,fileno,file=sys.stderr) + print(infile_pat,CCdate,segment,filetype,fileno,file=sys.stderr) raise (_,_,_,_,_,_,_,ff)=infile_name.split('/')