Mercurial > hg > cc > cirrus_work
view bin/do_idx.sh @ 143:f63a8477c9df
correct mistaken futnsz test,
add a few more quoting / coding tests
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 02 Oct 2023 18:55:48 +0100 |
parents | 0326805aa6df |
children | 3ba401110c22 |
line wrap: on
line source
#!/bin/bash # Usage: do_idx.sh [-d...] [-m previously-merged-log] result-dir cdx-source-dir while [ "$1" = "-d" ] do shift debug="$debug -d" done if [ "$1" = "-m" ] then shift merged="-m $1" shift fi export res="$1" orig="$2" # igzip was faster, but produced bigger files, so went to gzip one step # smaller than default (-6), which produces slightly _smaller_ blocks. ~/lib/python/cc/lmh/merge_date.py $debug $merged <(LC_ALL=C sort -m -k1,3 -s $res/ks_[0-9]*.tsv) \ $orig $res/idx 2>$res/merge.log | \ parallel -j 10 'echo {#} {} >$res/merge_{#}.log echo $(date) {#} {} export res split -l 3000 --filter="gzip -c -7 --keep | \ tee >(wc -c >> \ $res/merge_{#}.log)" \ {} > {}.gz && \ rm {}'