changeset 239:992f59d21832

working, but last count/offset not being written
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sat, 28 Sep 2024 15:19:05 +0100
parents a4538bdfa92a
children 51bd09d4289e
files lib/python/unpackz.py
diffstat 1 files changed, 57 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/unpackz.py	Sat Sep 28 15:19:05 2024 +0100
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+'''See https://stackoverflow.com/a/37042747/2595465'''
+import sys
+import isal.isal_zlib
+offset = 0
+obuf_len = 0
+nbuf = lastbuf = False
+BUFSIZE = 1048576
+outfile = None
+if sys.argv[1] == '-o':
+  sys.argv.pop(1)
+  if len(sys.argv)==3:
+    outfile = open(sys.argv.pop(1),'wb')
+  else:
+    print('need an outfile', file=sys.stderr)
+    exit(1)
+with open(sys.argv[1],'rb') as f:
+  z = isal.isal_zlib.decompressobj(31)
+  count = 0
+  while True:
+      if z.unused_data == b"":
+          #print('n', obuf_len, file=sys.stderr)
+          if nbuf:
+            obuf_len += BUFSIZE # still no EOS after a full buffer processed
+          buf = f.read(BUFSIZE)
+          nbuf = True
+          lastbuf = ((truesize:=len(buf)) < BUFSIZE) # will only succeed if now at EOF
+          if buf == b"":
+              if outfile is None:
+                print(count, offset)
+              else:
+                outfile.write(b'\000%d\000%d\000'%(count, offset))
+              break
+      else:
+          buf_len = len(buf)
+          #print('b', obuf_len, buf_len, len(z.unused_data), len(buf)-len(z.unused_data),
+          #      nbuf, lastbuf, file=sys.stderr)
+          count = (obuf_len if (buf_len == truesize) else 0) + \
+                  (len(buf)-len(z.unused_data))
+          if outfile is None:
+            print(count, offset)
+          else:
+            outfile.write(b'\000%d\000%d\000'%(count, offset))
+          offset += count
+          count = 0
+          buf = z.unused_data
+          obuf_len = len(buf)
+          nbuf = False
+          z = isal.isal_zlib.decompressobj(31)
+      got = z.decompress(buf)
+      if outfile is not None:
+        outfile.write(got)
+if count!=0:
+  print("Unused data: count=%s offset=%s ?"%(count, offset),
+        file=sys.stderr)
+if outfile is not None:
+  outfile.close()