Mercurial > hg > cc > cirrus_work
comparison bin/per_segment.py @ 23:e82a82ea3704
sic
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 21 Oct 2022 18:09:53 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
22:38bab758e469 | 23:e82a82ea3704 |
---|---|
1 #!/usr/bin/python3 | |
2 '''refactor a per-cdx count table to be per-segment | |
3 input on STDIN | |
4 Usage: per_segment segment-column | |
5 Assumes column 0 is empty, count is in column 1 | |
6 Segment column is 0-origin | |
7 ''' | |
8 | |
9 import sys | |
10 | |
11 c=int(sys.argv[1]) | |
12 | |
13 ss=[dict() for i in range(100)] | |
14 | |
15 for l in sys.stdin: | |
16 try: | |
17 cc=l.split('\t') | |
18 s=int(cc.pop(c)) | |
19 n=int(cc.pop(1)) | |
20 ll='\t'.join(cc[1:]) # note we ditch the initial empty column | |
21 #print(s,n,cc,ll,sep='|') | |
22 #exit(0) | |
23 t=ss[s].get(ll,0) | |
24 ss[s][ll]=t+n | |
25 except: | |
26 sys.stdout.write(l) | |
27 print(cc) | |
28 exit(1) | |
29 | |
30 # note this won't work if c is last column! | |
31 for s in range(100): | |
32 with open('s%s.tsv'%s,'w') as f: | |
33 for (l,c) in sorted(ss[s].items(),key=lambda p:p[1],reverse=True): | |
34 f.write(str(c)) | |
35 f.write('\t') | |
36 f.write(l) |