Mercurial > hg > cc > cirrus_work
view bin/by_interval.py @ 204:81ca65d44241
normalise % counts by non-empty bases only
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 06 Dec 2023 13:33:25 +0000 |
parents | bc66c6098e5e |
children | d2c4fec1ed21 |
line wrap: on
line source
#!/usr/bin/python3 '''Split stamped data by time interval, e.g. year or month Usage: origin-interval output-dir by_interval.py interval-file sorted-ks-file.tsv [field] If field is given, ks-file is tsv and stamp is in that field, which, as for cut, is 1-origin. Interval IDs are assumed to be sequential numbers. ''' import sys, os, os.path if len(sys.argv)>5: (origin, outdir, intv_file, in_file, field) = sys.argv[1:] field=int(field)-1 else: (origin, outdir, intv_file, in_file) = sys.argv[1:] field=3 if not os.access(outdir,os.F_OK): os.mkdir(outdir) origin = int(origin) current = origin is_first = True with open(intv_file,'r') as intvs, open(in_file,'r') as stamped: y = intvs.readline() l = stamped.readline() while y: y = float(y) if is_first: ysuf = 'prev' else: ysuf = str(current-1) with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file: while l: if float(l.split('\t')[field]) < y: y_file.write(l) else: break l = stamped.readline() y = intvs.readline() current += 1 is_first = False else: with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file: while l: y_file.write(l) l = stamped.readline()