167
|
1 #!/usr/bin/python3
|
|
2 '''Split stamped data by year
|
|
3 Usage: origin-year output-dir by_year.py year-file sorted-ks-file.tsv [field]
|
|
4 If field is given, ks-file is tsv and stamp is in that field,
|
|
5 which, as for cut, is 1-origin'''
|
|
6
|
|
7 import sys, os, os.path
|
|
8
|
|
9 if len(sys.argv)>5:
|
|
10 (origin, outdir, year_file, in_file, field) = sys.argv[1:]
|
|
11 field=int(field)-1
|
|
12 else:
|
|
13 (origin, outdir, year_file, in_file) = sys.argv[1:]
|
|
14 field=3
|
|
15
|
|
16 if not os.access(outdir,os.F_OK):
|
|
17 os.mkdir(outdir)
|
|
18
|
168
|
19 origin = int(origin)
|
167
|
20 current = origin
|
|
21 is_first = True
|
|
22 with open(year_file,'r') as years, open(in_file,'r') as stamped:
|
|
23 y = years.readline()
|
|
24 l = stamped.readline()
|
|
25 while y:
|
|
26 y = float(y)
|
|
27 if is_first:
|
|
28 ysuf = 'prev'
|
|
29 else:
|
|
30 ysuf = str(current-1)
|
|
31 with open(os.path.join(outdir,'ks_%s.tsv'%ysuf),'w') as y_file:
|
|
32 while l:
|
|
33 if float(l.split('\t')[field]) < y:
|
|
34 y_file.write(l)
|
|
35 else:
|
|
36 break
|
|
37 l = stamped.readline()
|
|
38 y = years.readline()
|
|
39 current += 1
|
|
40 is_first = False
|
|
41 else:
|
|
42 with open(os.path.join(outdir,'ks_post.tsv'),'w') as y_file:
|
|
43 while l:
|
|
44 y_file.write(l)
|
|
45 l = stamped.readline()
|