view bin/by_interval.py @ 178:c42a5f7c97c5

renamed to by_interval.py
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 24 Nov 2023 20:40:09 +0000
parents bc66c6098e5e
children d2c4fec1ed21
line wrap: on
line source

#!/usr/bin/python3
'''Split stamped data by time interval, e.g. year or month
Usage: origin-interval output-dir by_interval.py interval-file sorted-ks-file.tsv [field]
If field is given, ks-file is tsv and stamp is in that field,
which, as for cut, is 1-origin.  Interval IDs are assumed to be sequential numbers.
'''

import sys, os, os.path

if len(sys.argv)>5:
  (origin, outdir, intv_file, in_file, field) = sys.argv[1:]
  field=int(field)-1
else:
  (origin, outdir, intv_file, in_file) = sys.argv[1:]
  field=3

if not os.access(outdir,os.F_OK):
  os.mkdir(outdir)

origin = int(origin)
current = origin
is_first = True
with open(intv_file,'r') as intvs, open(in_file,'r') as stamped:
  y = intvs.readline()
  l = stamped.readline()
  while y:
    y = float(y)
    if is_first:
      ysuf = 'prev'
    else:
      ysuf = str(current-1)
    with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file:
      while l:
        if float(l.split('\t')[field]) < y:
          y_file.write(l)
        else:
          break
        l = stamped.readline()
    y = intvs.readline()
    current += 1
    is_first = False
  else:
    with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file:
      while l:
        y_file.write(l)
        l = stamped.readline()