annotate cluster.py @ 50:d335ca1fb71b

merge stale
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Sun, 31 Jul 2022 19:07:39 +0100
parents 668d788cac47
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
1 #!/usr/bin/python3
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
2 '''Read a minutor block lockation tsv and resort it to show clusters'''
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
3 import sys
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
4
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
5 from util import *
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
6
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
7 if len(sys.argv)==1 or sys.argv[1]=='-h':
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
8 print("""Usage: cluster.py [-h] [-c n] [-s] infile.tsv [outfile.tsv]
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
9 n is cluster diameter, default is 5
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
10 -s for strict circle distance, reduces number of candidates
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
11 default outfile is [infile]_c[n].tsv""")
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
12 exit(1)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
13 if sys.argv[1]=='-c':
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
14 sys.argv.pop(1)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
15 n=float(sys.argv.pop(1))
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
16 else:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
17 n=5.0
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
18
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
19 if sys.argv[1]=='-s':
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
20 sys.argv.pop(1)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
21 strict=True
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
22 else:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
23 strict=False
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
24
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
25 infile_name=sys.argv.pop(1)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
26 if len(sys.argv)>1:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
27 outfile_name=sys.argv.pop(1)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
28 else:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
29 outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
30
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
31 cc=[]
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
32
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
33 with open(infile_name,'r') as infile:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
34 with open(outfile_name,'w') as outfile:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
35 l=infile.readline().rstrip()
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
36 print(l,file=outfile)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
37 ff=PPAT.split(l)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
38 (nr,ox,oy,oz)=intsMaybe(ff)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
39 home=(float(ox),float(oy),float(oz))
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
40 et=ff[9]
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
41 l=infile.readline().rstrip()
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
42 print(l,file=outfile)
34
668d788cac47 handle unbounded Y, landing below target elevation
Henry Thompson <ht@markup.co.uk>
parents: 23
diff changeset
43 l2=PPAT.split(l)
668d788cac47 handle unbounded Y, landing below target elevation
Henry Thompson <ht@markup.co.uk>
parents: 23
diff changeset
44 if l2[-1]=='bounded':
668d788cac47 handle unbounded Y, landing below target elevation
Henry Thompson <ht@markup.co.uk>
parents: 23
diff changeset
45 orad=int(l2[1])
668d788cac47 handle unbounded Y, landing below target elevation
Henry Thompson <ht@markup.co.uk>
parents: 23
diff changeset
46 ymin=ymax=-1
668d788cac47 handle unbounded Y, landing below target elevation
Henry Thompson <ht@markup.co.uk>
parents: 23
diff changeset
47 else:
668d788cac47 handle unbounded Y, landing below target elevation
Henry Thompson <ht@markup.co.uk>
parents: 23
diff changeset
48 (orad,ymin,ymax)=intsMaybe(l2)
23
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
49 _=infile.readline()
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
50 for l in infile:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
51 found=False
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
52 (_,td,qd)=l.rstrip().split('\t')
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
53 q=[float(i) for i in qd.split(',')]
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
54 td=float(td)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
55 if strict and td>orad:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
56 # Enforce a circular region, not square
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
57 print(td,orad,file=sys.stderr)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
58 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
59 for c in cc:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
60 for p in c:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
61 if d(p,q)<=n:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
62 c.append(q)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
63 found=True
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
64 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
65 if found:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
66 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
67 if not found:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
68 cc.append([q])
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
69 oc=cc
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
70 cc=[] # lose
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
71 w=0
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
72 ow=-1
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
73 nc=[] # win
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
74 while True:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
75 for i,c in enumerate(oc):
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
76 win=False
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
77 for p in c:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
78 for g in oc[i+1:]:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
79 for q in g:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
80 if d(p,q)<=n:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
81 win=True
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
82 w+=1
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
83 nc.append(c+g)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
84 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
85 if win:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
86 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
87 if win:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
88 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
89 if not win:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
90 cc.append(c)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
91 print(len(cc),len(nc),ow,w,file=sys.stderr)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
92 if ow==w:
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
93 break
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
94 ow=w
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
95 oc=nc
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
96 nc=[]
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
97 for c in sorted(cc,reverse=True,key=lambda x:len(x)):
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
98 print(c,file=outfile)
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
99
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
100
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
101
1670a33e3e6d from markup
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
102