comparison bin/by_interval.py @ 172:bc66c6098e5e

should work for months also now
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 15 Nov 2023 10:24:32 +0000
parents bin/by_year.py@f95858689037
children d2c4fec1ed21
comparison
equal deleted inserted replaced
171:143d2c6d56da 172:bc66c6098e5e
1 #!/usr/bin/python3
2 '''Split stamped data by time interval, e.g. year or month
3 Usage: origin-interval output-dir by_interval.py interval-file sorted-ks-file.tsv [field]
4 If field is given, ks-file is tsv and stamp is in that field,
5 which, as for cut, is 1-origin. Interval IDs are assumed to be sequential numbers.
6 '''
7
8 import sys, os, os.path
9
10 if len(sys.argv)>5:
11 (origin, outdir, intv_file, in_file, field) = sys.argv[1:]
12 field=int(field)-1
13 else:
14 (origin, outdir, intv_file, in_file) = sys.argv[1:]
15 field=3
16
17 if not os.access(outdir,os.F_OK):
18 os.mkdir(outdir)
19
20 origin = int(origin)
21 current = origin
22 is_first = True
23 with open(intv_file,'r') as intvs, open(in_file,'r') as stamped:
24 y = intvs.readline()
25 l = stamped.readline()
26 while y:
27 y = float(y)
28 if is_first:
29 ysuf = 'prev'
30 else:
31 ysuf = str(current-1)
32 with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file:
33 while l:
34 if float(l.split('\t')[field]) < y:
35 y_file.write(l)
36 else:
37 break
38 l = stamped.readline()
39 y = intvs.readline()
40 current += 1
41 is_first = False
42 else:
43 with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file:
44 while l:
45 y_file.write(l)
46 l = stamped.readline()