view lib/python/cc/unpackz.py @ 264:7886d7de5eed

use cdb library directly, sequestration of cdb handle complete and working, nndb counts two loops now, one with and one without counting successes
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 31 Jan 2025 13:31:02 +0000
parents ce5b2c1da222
children
line wrap: on
line source

#!/usr/bin/env python3
'''See https://stackoverflow.com/a/37042747/2595465
Usage: unpackz.py [-o response-out-file][-b buffer-size]
'''
import sys
import isal.isal_zlib
BUFSIZE = 1048576

def unpackz(infileName, callback, outfile = None):
  offset = 0
  obuf_len = 0
  nbuf = lastbuf = False
  with open(infileName,'rb') as f:
    z = isal.isal_zlib.decompressobj(31)
    count = 0
    prev_buf = buf = got = None # Keep the compiler happy
    ogot = None
    while True:
      if z.unused_data == b"": 
        #print('n', obuf_len, file=sys.stderr)
        if ogot is not None:
          ogot = ogot + got
        else:
          ogot = got
        if lastbuf:  # buf == b"":
          callback(obuf_len, offset, got, outfile)
          if count!=0:
            print("Unused data: count=%s offset=%s ?"%(count, offset),
                  file=sys.stderr)
          break
        buf = f.read(BUFSIZE)
        nbuf = True
        lastbuf = ((truesize:=len(buf)) < BUFSIZE) # will only succeed if now at EOF
      else:
        buf_len = len(buf)
        #print('b', obuf_len, buf_len, len(z.unused_data), len(buf)-len(z.unused_data),
        #      nbuf, lastbuf, file=sys.stderr)
        count = (obuf_len if (buf_len == truesize) else 0) + \
                (len(buf)-len(z.unused_data))
        #if (offset == 1352249):
        #  breakpoint()
        callback(count, offset, got if ogot is None else ogot + got, outfile)
        ogot = None
        offset += count
        count = 0
        buf = z.unused_data
        obuf_len = len(buf)
        nbuf = False
        z = isal.isal_zlib.decompressobj(31)
      got = z.decompress(buf)

def printVal(count,offset,data,outfile):
  if outfile is None:
    print(count, offset)
  else:
    outfile.write(b'\000%d\000%d\000'%(count, offset))
    outfile.write(data)

if __name__ == '__main__':
  outfile = None
  if sys.argv[1] == '-o':
    sys.argv.pop(1)
    if len(sys.argv)>=3:
      outfile = open(sys.argv.pop(1),'wb')
    else:
      print('need an outfile', file=sys.stderr)
      exit(1)
  if sys.argv[1] == '-b':
    sys.argv.pop(1)
    if len(sys.argv)==3:
      BUFSIZE = int(sys.argv.pop(1))
    else:
      print('need a buffer length', file=sys.stderr)
      exit(2)
  unpackz(sys.argv[1], printVal, outfile)
  if outfile is not None:
    outfile.close()