view bin/do_idx.sh @ 142:0326805aa6df

change path to merge_date.py
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 02 Oct 2023 18:54:10 +0100
parents 5b952d16838c
children 3ba401110c22
line wrap: on
line source

#!/bin/bash
# Usage: do_idx.sh [-d...] [-m previously-merged-log] result-dir cdx-source-dir
while [ "$1" = "-d" ]
do
 shift
 debug="$debug -d"
done
if [ "$1" = "-m" ]
then
 shift
 merged="-m $1"
 shift
fi
export res="$1"
orig="$2"
# igzip was faster, but produced bigger files, so went to gzip one step
# smaller than default (-6), which produces slightly _smaller_ blocks.
~/lib/python/cc/lmh/merge_date.py $debug $merged <(LC_ALL=C sort -m -k1,3 -s $res/ks_[0-9]*.tsv) \
              $orig $res/idx 2>$res/merge.log | \
          parallel -j 10 'echo {#} {} >$res/merge_{#}.log
                          echo $(date) {#} {}
                          export res
                          split -l 3000 --filter="gzip -c -7 --keep | \
                                                  tee >(wc -c >> \
                                                        $res/merge_{#}.log)" \
                                    {} > {}.gz && \
                                    rm {}'