comparison cluster.py @ 23:1670a33e3e6d

from markup
author Henry Thompson <ht@markup.co.uk>
date Sat, 29 May 2021 11:07:34 +0100
parents
children 668d788cac47
comparison
equal deleted inserted replaced
5:bd1db1ed4c25 23:1670a33e3e6d
1 #!/usr/bin/python3
2 '''Read a minutor block lockation tsv and resort it to show clusters'''
3 import sys
4
5 from util import *
6
7 if len(sys.argv)==1 or sys.argv[1]=='-h':
8 print("""Usage: cluster.py [-h] [-c n] [-s] infile.tsv [outfile.tsv]
9 n is cluster diameter, default is 5
10 -s for strict circle distance, reduces number of candidates
11 default outfile is [infile]_c[n].tsv""")
12 exit(1)
13 if sys.argv[1]=='-c':
14 sys.argv.pop(1)
15 n=float(sys.argv.pop(1))
16 else:
17 n=5.0
18
19 if sys.argv[1]=='-s':
20 sys.argv.pop(1)
21 strict=True
22 else:
23 strict=False
24
25 infile_name=sys.argv.pop(1)
26 if len(sys.argv)>1:
27 outfile_name=sys.argv.pop(1)
28 else:
29 outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)
30
31 cc=[]
32
33 with open(infile_name,'r') as infile:
34 with open(outfile_name,'w') as outfile:
35 l=infile.readline().rstrip()
36 print(l,file=outfile)
37 ff=PPAT.split(l)
38 (nr,ox,oy,oz)=intsMaybe(ff)
39 home=(float(ox),float(oy),float(oz))
40 et=ff[9]
41 l=infile.readline().rstrip()
42 print(l,file=outfile)
43 (orad,ymin,ymax)=intsMaybe(PPAT.split(l))
44 _=infile.readline()
45 for l in infile:
46 found=False
47 (_,td,qd)=l.rstrip().split('\t')
48 q=[float(i) for i in qd.split(',')]
49 td=float(td)
50 if strict and td>orad:
51 # Enforce a circular region, not square
52 print(td,orad,file=sys.stderr)
53 break
54 for c in cc:
55 for p in c:
56 if d(p,q)<=n:
57 c.append(q)
58 found=True
59 break
60 if found:
61 break
62 if not found:
63 cc.append([q])
64 oc=cc
65 cc=[] # lose
66 w=0
67 ow=-1
68 nc=[] # win
69 while True:
70 for i,c in enumerate(oc):
71 win=False
72 for p in c:
73 for g in oc[i+1:]:
74 for q in g:
75 if d(p,q)<=n:
76 win=True
77 w+=1
78 nc.append(c+g)
79 break
80 if win:
81 break
82 if win:
83 break
84 if not win:
85 cc.append(c)
86 print(len(cc),len(nc),ow,w,file=sys.stderr)
87 if ow==w:
88 break
89 ow=w
90 oc=nc
91 nc=[]
92 for c in sorted(cc,reverse=True,key=lambda x:len(x)):
93 print(c,file=outfile)
94
95
96
97