annotate bin/by_year.py @ 177:a5d54736a77f

renamed from spearman.py
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 24 Nov 2023 20:39:08 +0000
parents f95858689037
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
167
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/usr/bin/python3
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 '''Split stamped data by year
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 Usage: origin-year output-dir by_year.py year-file sorted-ks-file.tsv [field]
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 If field is given, ks-file is tsv and stamp is in that field,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 which, as for cut, is 1-origin'''
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 import sys, os, os.path
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 if len(sys.argv)>5:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 (origin, outdir, year_file, in_file, field) = sys.argv[1:]
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 field=int(field)-1
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 else:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 (origin, outdir, year_file, in_file) = sys.argv[1:]
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 field=3
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 if not os.access(outdir,os.F_OK):
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 os.mkdir(outdir)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18
168
f95858689037 fix output year
Henry S. Thompson <ht@inf.ed.ac.uk>
parents: 167
diff changeset
19 origin = int(origin)
167
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 current = origin
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 is_first = True
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22 with open(year_file,'r') as years, open(in_file,'r') as stamped:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 y = years.readline()
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24 l = stamped.readline()
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 while y:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26 y = float(y)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 if is_first:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28 ysuf = 'prev'
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29 else:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 ysuf = str(current-1)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31 with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
32 while l:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
33 if float(l.split('\t')[field]) < y:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34 y_file.write(l)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
35 else:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36 break
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37 l = stamped.readline()
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38 y = years.readline()
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
39 current += 1
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
40 is_first = False
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
41 else:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42 with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
43 while l:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
44 y_file.write(l)
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
45 l = stamped.readline()