comparison bin/warc.py @ 54:9c63039a9b6d

little steps
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 07 Jul 2023 19:30:23 +0100
parents 0dc144bd027c
children f8c8f79b2532
comparison
equal deleted inserted replaced
53:0dc144bd027c 54:9c63039a9b6d
14 else: 14 else:
15 stream=open(filename,'rb',0) 15 stream=open(filename,'rb',0)
16 bufSize=2*1024*1024 16 bufSize=2*1024*1024
17 hdrMax=16*1024 17 hdrMax=16*1024
18 buf=bytearray(bufSize) 18 buf=bytearray(bufSize)
19 hdrBuf=memoryview(buf)[:hdrMax] 19 with memoryview(buf)[:hdrMax] as hdrBuf:
20 fpos=bl=stream.readinto(hdrBuf) 20 fpos=bl=stream.readinto(hdrBuf)
21 while True: 21 while True:
22 bp=0 22 bp=0
23 while buf.startswith(b'\r\n',bp): 23 while buf.startswith(b'\r\n',bp):
24 bp+=2 24 bp+=2
25 bob=bp 25 bob=bp
45 bp=eol+2 45 bp=eol+2
46 if (wtype in types): 46 if (wtype in types):
47 if whole: 47 if whole:
48 pass # buf[bp:(bp:=bp+ln)]=l 48 pass # buf[bp:(bp:=bp+ln)]=l
49 elif (parts & 1): 49 elif (parts & 1):
50 print('cb')
51 OUT=callback(wtype,buf[bob:eol],1) 50 OUT=callback(wtype,buf[bob:eol],1)
52 if parts!=1: 51 if parts!=1:
53 # everything from bv= goes here 52 # everything from bv= goes here
54 # need to read more if eol+length>hdrMax 53 # need to read more if eol+length>hdrMax
55 pass 54 pass
56 print(wtype,bob,eol,length) 55 print(wtype,bob,bp,eol,length,file=sys.stderr)
57 jumpTo=bp+length 56 jumpTo=bp+length
58 buf[0:hdrMax-jumpTo]=buf[jumpTo:hdrMax] 57 buf[0:jumpTo]=buf[jumpTo:hdrMax]
59 stream.seek(fpos:=fpos+jumpTo) 58 _fpos=stream.seek(fpos:=fpos+jumpTo)
59 print('fp',_fpos,fpos,file=sys.stderr)
60 if done: 60 if done:
61 continue 61 print('finished',file=sys.stderr)
62 n=stream.readinto(memoryview(buf)[hdrMax-jumpTo:hdrMax]) 62 break
63 print('read',n) 63 with memoryview(buf) as mv:
64 n=stream.readinto(mv[hdrMax-jumpTo:hdrMax])
65 print('read',n,file=sys.stderr)
64 if n<jumpTo or n==0: 66 if n<jumpTo or n==0:
65 print('done',n,jumpTo) 67 print('done',n,jumpTo,file=sys.stderr)
66 done=True 68 done=True
67 bp=0 69 bp=0
68 #while not buf.startswith(b'\r\n',bp): 70 #while not buf.startswith(b'\r\n',bp):
69 OUT.write(b"=====\n") 71 OUT.write(b"=====\n")
70 OUT.write(buf[0:100]) 72 OUT.write(buf[0:100])