changeset 243:7bef91ca3d51

make into a library, entry point def unpackz(infileName, callback, outfile = None), moved to python/lib/cc
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 02 Oct 2024 19:54:45 +0100
parents e117424e244a
children ce5b2c1da222
files lib/python/cc/unpackz.py lib/python/unpackz.py
diffstat 2 files changed, 75 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/cc/unpackz.py	Wed Oct 02 19:54:45 2024 +0100
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+'''See https://stackoverflow.com/a/37042747/2595465
+Usage: unpackz.py [-o response-out-file][-b buffer-size]
+'''
+import sys
+import isal.isal_zlib
+BUFSIZE = 1048576
+
+def unpackz(infileName, callback, outfile = None):
+  offset = 0
+  obuf_len = 0
+  nbuf = lastbuf = False
+  with open(infileName,'rb') as f:
+    z = isal.isal_zlib.decompressobj(31)
+    count = 0
+    got = None # Keep the compiler happy
+    while True:
+      if z.unused_data == b"": 
+          #print('n', obuf_len, file=sys.stderr)
+        if lastbuf:  # buf == b"":
+          callback(obuf_len, offset, got, outfile)
+          if count!=0:
+            print("Unused data: count=%s offset=%s ?"%(count, offset),
+                  file=sys.stderr)
+          break
+        if nbuf:
+          obuf_len += BUFSIZE # still no EOS after a full buffer processed
+        buf = f.read(BUFSIZE)
+        nbuf = True
+        lastbuf = ((truesize:=len(buf)) < BUFSIZE) # will only succeed if now at EOF
+      else:
+        buf_len = len(buf)
+        #print('b', obuf_len, buf_len, len(z.unused_data), len(buf)-len(z.unused_data),
+        #      nbuf, lastbuf, file=sys.stderr)
+        count = (obuf_len if (buf_len == truesize) else 0) + \
+                (len(buf)-len(z.unused_data))
+        #if (offset == 1352249):
+        #  breakpoint()
+        callback(count, offset, got, outfile)
+        offset += count
+        count = 0
+        buf = z.unused_data
+        obuf_len = len(buf)
+        nbuf = False
+        z = isal.isal_zlib.decompressobj(31)
+      got = z.decompress(buf)
+
+def printVal(count,offset,data,outfile):
+  if outfile is None:
+    print(count, offset)
+  else:
+    outfile.write(b'\000%d\000%d\000'%(count, offset))
+    outfile.write(data)
+
+if __name__ == '__main__':
+  outfile = None
+  if sys.argv[1] == '-o':
+    sys.argv.pop(1)
+    if len(sys.argv)>=3:
+      outfile = open(sys.argv.pop(1),'wb')
+    else:
+      print('need an outfile', file=sys.stderr)
+      exit(1)
+  if sys.argv[1] == '-b':
+    sys.argv.pop(1)
+    if len(sys.argv)==3:
+      BUFSIZE = int(sys.argv.pop(1))
+    else:
+      print('need a buffer length', file=sys.stderr)
+      exit(2)
+  unpackz(sys.argv[1], printVal, outfile)
+  if outfile is not None:
+    outfile.close()
+
+
--- a/lib/python/unpackz.py	Wed Oct 02 11:09:58 2024 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-#!/usr/bin/env python3
-'''See https://stackoverflow.com/a/37042747/2595465
-Usage: unpackz.py [-o response-out-file][-b buffer-size]
-'''
-import sys
-import isal.isal_zlib
-offset = 0
-obuf_len = 0
-nbuf = lastbuf = False
-BUFSIZE = 1048576
-outfile = None
-if sys.argv[1] == '-o':
-  sys.argv.pop(1)
-  if len(sys.argv)>=3:
-    outfile = open(sys.argv.pop(1),'wb')
-  else:
-    print('need an outfile', file=sys.stderr)
-    exit(1)
-if sys.argv[1] == '-b':
-  sys.argv.pop(1)
-  if len(sys.argv)==3:
-    BUFSIZE = int(sys.argv.pop(1))
-  else:
-    print('need a buffer length', file=sys.stderr)
-    exit(2)
-
-with open(sys.argv[1],'rb') as f:
-  z = isal.isal_zlib.decompressobj(31)
-  count = 0
-  while True:
-    if z.unused_data == b"": 
-        #print('n', obuf_len, file=sys.stderr)
-      if lastbuf:  # buf == b"":
-        if outfile is None:
-          print(obuf_len, offset)
-        else:
-          outfile.write(b'\000%d\000%d\000'%(obuf_len, offset))
-        if count!=0:
-          print("Unused data: count=%s offset=%s ?"%(count, offset),
-                file=sys.stderr)
-        break
-      if nbuf:
-        obuf_len += BUFSIZE # still no EOS after a full buffer processed
-      buf = f.read(BUFSIZE)
-      nbuf = True
-      lastbuf = ((truesize:=len(buf)) < BUFSIZE) # will only succeed if now at EOF
-    else:
-      buf_len = len(buf)
-      #print('b', obuf_len, buf_len, len(z.unused_data), len(buf)-len(z.unused_data),
-      #      nbuf, lastbuf, file=sys.stderr)
-      count = (obuf_len if (buf_len == truesize) else 0) + \
-              (len(buf)-len(z.unused_data))
-      if outfile is None:
-        print(count, offset)
-      else:
-        outfile.write(b'\000%d\000%d\000'%(count, offset))
-      #if (offset == 1352249):
-      #  breakpoint()
-      offset += count
-      count = 0
-      buf = z.unused_data
-      obuf_len = len(buf)
-      nbuf = False
-      z = isal.isal_zlib.decompressobj(31)
-    got = z.decompress(buf)
-    if outfile is not None:
-      outfile.write(got)
-if outfile is not None:
-  outfile.close()