changeset 172:bc66c6098e5e

should work for months also now
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 15 Nov 2023 10:24:32 +0000
parents 143d2c6d56da
children 10c87f5c704d
files bin/by_interval.py
diffstat 1 files changed, 46 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/by_interval.py	Wed Nov 15 10:24:32 2023 +0000
@@ -0,0 +1,46 @@
+#!/usr/bin/python3
+'''Split stamped data by time interval, e.g. year or month
+Usage: origin-interval output-dir by_interval.py interval-file sorted-ks-file.tsv [field]
+If field is given, ks-file is tsv and stamp is in that field,
+which, as for cut, is 1-origin.  Interval IDs are assumed to be sequential numbers.
+'''
+
+import sys, os, os.path
+
+if len(sys.argv)>5:
+  (origin, outdir, intv_file, in_file, field) = sys.argv[1:]
+  field=int(field)-1
+else:
+  (origin, outdir, intv_file, in_file) = sys.argv[1:]
+  field=3
+
+if not os.access(outdir,os.F_OK):
+  os.mkdir(outdir)
+
+origin = int(origin)
+current = origin
+is_first = True
+with open(intv_file,'r') as intvs, open(in_file,'r') as stamped:
+  y = intvs.readline()
+  l = stamped.readline()
+  while y:
+    y = float(y)
+    if is_first:
+      ysuf = 'prev'
+    else:
+      ysuf = str(current-1)
+    with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file:
+      while l:
+        if float(l.split('\t')[field]) < y:
+          y_file.write(l)
+        else:
+          break
+        l = stamped.readline()
+    y = intvs.readline()
+    current += 1
+    is_first = False
+  else:
+    with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file:
+      while l:
+        y_file.write(l)
+        l = stamped.readline()