comparison python/cluster.py @ 4:56508a6033a9

minutor chunk hacking
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 25 May 2021 14:01:26 -0400
parents
children 672621ab4db4
comparison
equal deleted inserted replaced
3:870e13483642 4:56508a6033a9
1 #!/usr/bin/python3
2 '''Read a minutor block lockation tsv and resort it to show clusters'''
3 import sys
4
5 from util import *
6
7 if len(sys.argv)==1 or sys.argv[1]=='-h':
8 print("""Usage: cluster.py [-h] [-c n] infile.tsv [outfile.tsv]
9 n is cluster diameter, default is 5
10 default outfile is [infile]_c[n].tsv""")
11 exit(1)
12 if sys.argv[1]=='-c':
13 sys.argv.pop(1)
14 n=float(sys.argv.pop(1))
15 else:
16 n=5.0
17
18 infile_name=sys.argv.pop(1)
19 if len(sys.argv)>1:
20 outfile_name=sys.argv.pop(1)
21 else:
22 outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)
23
24 cc=[]
25
26 with open(infile_name,'r') as infile:
27 with open(outfile_name,'w') as outfile:
28 l=infile.readline().rstrip()
29 print(l,file=outfile)
30 ff=PPAT.split(l)
31 (nr,ox,oy,oz)=intsMaybe(ff)
32 et=ff[9]
33 l=infile.readline().rstrip()
34 print(l,file=outfile)
35 (orad,ymin,ymax)=intsMaybe(PPAT.split(l))
36 print(nr,ox,oy,oz,et,orad,ymin,ymax)
37 _=infile.readline()
38 for l in infile:
39 found=False
40 q=[float(i) for i in l.split('\t')[2].split(',')]
41 for c in cc:
42 for p in c:
43 if d(p,q)<=n:
44 c.append(q)
45 found=True
46 break
47 if found:
48 break
49 if not found:
50 cc.append([q])
51 oc=cc
52 cc=[] # lose
53 w=0
54 ow=-1
55 nc=[] # win
56 while True:
57 for i,c in enumerate(oc):
58 win=False
59 for p in c:
60 for g in oc[i+1:]:
61 for q in g:
62 if d(p,q)<=n:
63 win=True
64 w+=1
65 nc.append(c+g)
66 break
67 if win:
68 break
69 if win:
70 break
71 if not win:
72 cc.append(c)
73 print(len(cc),len(nc),ow,w,file=sys.stderr)
74 if ow==w:
75 break
76 ow=w
77 oc=nc
78 nc=[]
79 for c in sorted(cc,reverse=True,key=lambda x:len(x)):
80 print(c,file=outfile)
81
82
83
84