Mercurial > hg > cc > cirrus_work
changeset 101:e2e64c3d763e
bug4 fixed, but that created a new, earlier bug
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 11 Sep 2023 22:06:45 +0100 |
parents | 18446a7eeb9e |
children | e606c609f813 |
files | bin/do_idx.sh bin/merge_date.py |
diffstat | 2 files changed, 41 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/do_idx.sh Mon Sep 11 22:06:45 2023 +0100 @@ -0,0 +1,14 @@ +#!/bin/bash +export res="$1" +orig="$2" +merge_date.py -d <(LC_ALL=C sort -m -k1,2 -s $res/ks_[0-9]*.tsv) $orig $res/idx # | \ +exit + tee /dev/stderr | \ + parallel -j 10 'echo {#} {} >$res/merge_{#}.log + echo $(date) {#} {} + export res + split -l 3000 --filter="igzip -c | \ + tee >(wc -c >> \ + $res/merge_{#}.log)" \ + {} > {}.gz && \ + rm {}'
--- a/bin/merge_date.py Mon Sep 11 12:56:47 2023 +0100 +++ b/bin/merge_date.py Mon Sep 11 22:06:45 2023 +0100 @@ -63,7 +63,7 @@ time.sleep(0.1) # so it flushes? XN=XPATH%FN if not os.path.exists(XN): - return + return (None, None) XF = igzip.IGzipFile(filename=XN) NF = open((NN:=NPATH%FN), 'wb') xl = XF.readline() @@ -110,12 +110,13 @@ for xp in (xkey, xdate, xurl)))) messyU, xkey1, xkey2 = keys(xkey) if messyD: + noMatch = (ddate != xdate or + not dkey.startswith(xkey1) or + dkey!=xkey1 or + durl!=xurl) if messyU: # better match - if (ddate != xdate or - not dkey.startswith(xkey1) or - dkey!=xkey1 or - durl!=xurl): + if noMatch: raise ValueError("Fail: xkey: %s\n" " dkey: %s\n" " xdate: %s\n" @@ -128,11 +129,10 @@ "xl: %s"%(xkey, dkey, xdate, ddate, xurl, durl, (b'\n '.join(dfq)).decode('ascii'), xkey1, xkey2, FN, XCNT, DCNT, xl)) - messyD = False # fall through to the ordinary (non-messy) match case else: - # still looking, save this one - if DEBUG: + # still looking, save if >= date else fall through to write + if DEBUG>1: print("Diso: xkey: %s\n" " dkey: %s\n" " xdate: %s\n" @@ -141,14 +141,28 @@ " durl: %s\n" "xl: %s"%(xkey, dkey, xdate, ddate, xurl, durl, xl), file=sys.stderr) - xq.append(xl) - if DEBUG>1: - sys.stderr.write('xpush\n') - continue + if not noMatch: + xq.append(xl) + if DEBUG>1: + sys.stderr.write('xpush\n') + continue + # else fall through else: # Not messyD if messyU: - raise ValueError("messyD w/o messyU") + raise ValueError("messyU w/o messyD:" + "xkey: %s\n" + "dkey: %s\n" + "xdate: %s\n" + "ddate: %s\n" + "xurl: %s\n" + "durl: %s\n" + "dfq: %s\n" + "k1, k2: |%s|%s|\n" + "FN: %s XCNT: %s DCNT: %s\n" + "xl: %s"%(xkey, dkey, xdate, ddate, xurl, durl, + (b'\n '.join(dfq)).decode('ascii'), + xkey1, xkey2, FN, XCNT, DCNT, xl)) if (ddate != xdate or not dkey.startswith(xkey1) or (xkey2 is not None and dkey!=xkey2) or