Mercurial > hg > cc > cirrus_work
comparison bin/do_idx.sh @ 105:9403c02d5034
switch to gzip -7 to get comparable compressed cdx block size
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 13 Sep 2023 16:48:43 +0100 |
parents | 7d58dc01f329 |
children | a0ea1e4a714d |
comparison
equal
deleted
inserted
replaced
104:fc9a045c872b | 105:9403c02d5034 |
---|---|
1 #!/bin/bash | 1 #!/bin/bash |
2 export res="$1" | 2 export res="$1" |
3 orig="$2" | 3 orig="$2" |
4 # igzip was faster, but produced bigger files, so went to gzip one step | |
5 # smaller than default (-6), which produces slightly _smaller_ blocks. | |
4 merge_date.py <(LC_ALL=C sort -m -k1,2 -s $res/ks_[0-9]*.tsv) $orig $res/idx |\ | 6 merge_date.py <(LC_ALL=C sort -m -k1,2 -s $res/ks_[0-9]*.tsv) $orig $res/idx |\ |
5 parallel -j 10 'echo {#} {} >$res/merge_{#}.log | 7 parallel -j 10 'echo {#} {} >$res/merge_{#}.log |
6 echo $(date) {#} {} | 8 echo $(date) {#} {} |
7 export res | 9 export res |
8 split -l 3000 --filter="igzip -c | \ | 10 split -l 3000 --filter="gzip -c -7 --keep | \ |
9 tee >(wc -c >> \ | 11 tee >(wc -c >> \ |
10 $res/merge_{#}.log)" \ | 12 $res/merge_{#}.log)" \ |
11 {} > {}.gz && \ | 13 {} > {}.gz && \ |
12 rm {}' | 14 rm {}' |