comparison bin/per_segment.py @ 23:e82a82ea3704

sic
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 21 Oct 2022 18:09:53 +0100
parents
children
comparison
equal deleted inserted replaced
22:38bab758e469 23:e82a82ea3704
1 #!/usr/bin/python3
2 '''refactor a per-cdx count table to be per-segment
3 input on STDIN
4 Usage: per_segment segment-column
5 Assumes column 0 is empty, count is in column 1
6 Segment column is 0-origin
7 '''
8
9 import sys
10
11 c=int(sys.argv[1])
12
13 ss=[dict() for i in range(100)]
14
15 for l in sys.stdin:
16 try:
17 cc=l.split('\t')
18 s=int(cc.pop(c))
19 n=int(cc.pop(1))
20 ll='\t'.join(cc[1:]) # note we ditch the initial empty column
21 #print(s,n,cc,ll,sep='|')
22 #exit(0)
23 t=ss[s].get(ll,0)
24 ss[s][ll]=t+n
25 except:
26 sys.stdout.write(l)
27 print(cc)
28 exit(1)
29
30 # note this won't work if c is last column!
31 for s in range(100):
32 with open('s%s.tsv'%s,'w') as f:
33 for (l,c) in sorted(ss[s].items(),key=lambda p:p[1],reverse=True):
34 f.write(str(c))
35 f.write('\t')
36 f.write(l)