changeset 105:9403c02d5034

switch to gzip -7 to get comparable compressed cdx block size
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 13 Sep 2023 16:48:43 +0100
parents fc9a045c872b
children 6104acc1345b
files bin/do_idx.sh
diffstat 1 files changed, 3 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/bin/do_idx.sh	Wed Sep 13 12:41:55 2023 +0100
+++ b/bin/do_idx.sh	Wed Sep 13 16:48:43 2023 +0100
@@ -1,11 +1,13 @@
 #!/bin/bash
 export res="$1"
 orig="$2"
+# igzip was faster, but produced bigger files, so went to gzip one step
+# smaller than default (-6), which produces slightly _smaller_ blocks.
 merge_date.py <(LC_ALL=C sort -m -k1,2 -s $res/ks_[0-9]*.tsv) $orig $res/idx |\
           parallel -j 10 'echo {#} {} >$res/merge_{#}.log
                           echo $(date) {#} {}
                           export res
-                          split -l 3000 --filter="igzip -c | \
+                          split -l 3000 --filter="gzip -c -7 --keep | \
                                                   tee >(wc -c >> \
                                                         $res/merge_{#}.log)" \
                                     {} > {}.gz && \