Mercurial > hg > cc > cirrus_work
comparison bin/warc.py @ 54:9c63039a9b6d
little steps
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 07 Jul 2023 19:30:23 +0100 |
parents | 0dc144bd027c |
children | f8c8f79b2532 |
comparison
equal
deleted
inserted
replaced
53:0dc144bd027c | 54:9c63039a9b6d |
---|---|
14 else: | 14 else: |
15 stream=open(filename,'rb',0) | 15 stream=open(filename,'rb',0) |
16 bufSize=2*1024*1024 | 16 bufSize=2*1024*1024 |
17 hdrMax=16*1024 | 17 hdrMax=16*1024 |
18 buf=bytearray(bufSize) | 18 buf=bytearray(bufSize) |
19 hdrBuf=memoryview(buf)[:hdrMax] | 19 with memoryview(buf)[:hdrMax] as hdrBuf: |
20 fpos=bl=stream.readinto(hdrBuf) | 20 fpos=bl=stream.readinto(hdrBuf) |
21 while True: | 21 while True: |
22 bp=0 | 22 bp=0 |
23 while buf.startswith(b'\r\n',bp): | 23 while buf.startswith(b'\r\n',bp): |
24 bp+=2 | 24 bp+=2 |
25 bob=bp | 25 bob=bp |
45 bp=eol+2 | 45 bp=eol+2 |
46 if (wtype in types): | 46 if (wtype in types): |
47 if whole: | 47 if whole: |
48 pass # buf[bp:(bp:=bp+ln)]=l | 48 pass # buf[bp:(bp:=bp+ln)]=l |
49 elif (parts & 1): | 49 elif (parts & 1): |
50 print('cb') | |
51 OUT=callback(wtype,buf[bob:eol],1) | 50 OUT=callback(wtype,buf[bob:eol],1) |
52 if parts!=1: | 51 if parts!=1: |
53 # everything from bv= goes here | 52 # everything from bv= goes here |
54 # need to read more if eol+length>hdrMax | 53 # need to read more if eol+length>hdrMax |
55 pass | 54 pass |
56 print(wtype,bob,eol,length) | 55 print(wtype,bob,bp,eol,length,file=sys.stderr) |
57 jumpTo=bp+length | 56 jumpTo=bp+length |
58 buf[0:hdrMax-jumpTo]=buf[jumpTo:hdrMax] | 57 buf[0:jumpTo]=buf[jumpTo:hdrMax] |
59 stream.seek(fpos:=fpos+jumpTo) | 58 _fpos=stream.seek(fpos:=fpos+jumpTo) |
59 print('fp',_fpos,fpos,file=sys.stderr) | |
60 if done: | 60 if done: |
61 continue | 61 print('finished',file=sys.stderr) |
62 n=stream.readinto(memoryview(buf)[hdrMax-jumpTo:hdrMax]) | 62 break |
63 print('read',n) | 63 with memoryview(buf) as mv: |
64 n=stream.readinto(mv[hdrMax-jumpTo:hdrMax]) | |
65 print('read',n,file=sys.stderr) | |
64 if n<jumpTo or n==0: | 66 if n<jumpTo or n==0: |
65 print('done',n,jumpTo) | 67 print('done',n,jumpTo,file=sys.stderr) |
66 done=True | 68 done=True |
67 bp=0 | 69 bp=0 |
68 #while not buf.startswith(b'\r\n',bp): | 70 #while not buf.startswith(b'\r\n',bp): |
69 OUT.write(b"=====\n") | 71 OUT.write(b"=====\n") |
70 OUT.write(buf[0:100]) | 72 OUT.write(buf[0:100]) |