4
|
1 #!/usr/bin/python3
|
|
2 '''Read a minutor block lockation tsv and resort it to show clusters'''
|
|
3 import sys
|
|
4
|
|
5 from util import *
|
|
6
|
|
7 if len(sys.argv)==1 or sys.argv[1]=='-h':
|
|
8 print("""Usage: cluster.py [-h] [-c n] infile.tsv [outfile.tsv]
|
|
9 n is cluster diameter, default is 5
|
|
10 default outfile is [infile]_c[n].tsv""")
|
|
11 exit(1)
|
|
12 if sys.argv[1]=='-c':
|
|
13 sys.argv.pop(1)
|
|
14 n=float(sys.argv.pop(1))
|
|
15 else:
|
|
16 n=5.0
|
|
17
|
|
18 infile_name=sys.argv.pop(1)
|
|
19 if len(sys.argv)>1:
|
|
20 outfile_name=sys.argv.pop(1)
|
|
21 else:
|
|
22 outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)
|
|
23
|
|
24 cc=[]
|
|
25
|
|
26 with open(infile_name,'r') as infile:
|
|
27 with open(outfile_name,'w') as outfile:
|
|
28 l=infile.readline().rstrip()
|
|
29 print(l,file=outfile)
|
|
30 ff=PPAT.split(l)
|
|
31 (nr,ox,oy,oz)=intsMaybe(ff)
|
|
32 et=ff[9]
|
|
33 l=infile.readline().rstrip()
|
|
34 print(l,file=outfile)
|
|
35 (orad,ymin,ymax)=intsMaybe(PPAT.split(l))
|
|
36 print(nr,ox,oy,oz,et,orad,ymin,ymax)
|
|
37 _=infile.readline()
|
|
38 for l in infile:
|
|
39 found=False
|
|
40 q=[float(i) for i in l.split('\t')[2].split(',')]
|
|
41 for c in cc:
|
|
42 for p in c:
|
|
43 if d(p,q)<=n:
|
|
44 c.append(q)
|
|
45 found=True
|
|
46 break
|
|
47 if found:
|
|
48 break
|
|
49 if not found:
|
|
50 cc.append([q])
|
|
51 oc=cc
|
|
52 cc=[] # lose
|
|
53 w=0
|
|
54 ow=-1
|
|
55 nc=[] # win
|
|
56 while True:
|
|
57 for i,c in enumerate(oc):
|
|
58 win=False
|
|
59 for p in c:
|
|
60 for g in oc[i+1:]:
|
|
61 for q in g:
|
|
62 if d(p,q)<=n:
|
|
63 win=True
|
|
64 w+=1
|
|
65 nc.append(c+g)
|
|
66 break
|
|
67 if win:
|
|
68 break
|
|
69 if win:
|
|
70 break
|
|
71 if not win:
|
|
72 cc.append(c)
|
|
73 print(len(cc),len(nc),ow,w,file=sys.stderr)
|
|
74 if ow==w:
|
|
75 break
|
|
76 ow=w
|
|
77 oc=nc
|
|
78 nc=[]
|
|
79 for c in sorted(cc,reverse=True,key=lambda x:len(x)):
|
|
80 print(c,file=outfile)
|
|
81
|
|
82
|
|
83
|
|
84
|