view bin/by_interval.py @ 221:3d752f01e6fb

minor addition?
author Henry Thompson <ht@markup.co.uk>
date Wed, 28 Feb 2024 10:32:01 +0000
parents d2c4fec1ed21
children
line wrap: on
line source

#!/usr/bin/python3
'''Split stamped data by time interval, e.g. year or month
Usage: by_interval.py origin-interval output-dir interval-file sorted-ks-file.tsv [field]
If field is given, ks-file is tsv and stamp is in that field,
which, as for cut, is 1-origin.  Interval IDs are assumed to be sequential numbers.
'''

import sys, os, os.path

if len(sys.argv)>5:
  (origin, outdir, intv_file, in_file, field) = sys.argv[1:]
  field=int(field)-1
else:
  (origin, outdir, intv_file, in_file) = sys.argv[1:]
  field=3

if not os.access(outdir,os.F_OK):
  os.mkdir(outdir)

origin = int(origin)
current = origin
is_first = True
with open(intv_file,'r') as intvs, open(in_file,'r') as stamped:
  y = intvs.readline()
  l = stamped.readline()
  while y:
    y = float(y)
    if is_first:
      ysuf = 'prev'
    else:
      ysuf = str(current-1)
    with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file:
      while l:
        if float(l.split('\t')[field]) < y:
          y_file.write(l)
        else:
          break
        l = stamped.readline()
    y = intvs.readline()
    current += 1
    is_first = False
  else:
    with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file:
      while l:
        y_file.write(l)
        l = stamped.readline()