diff cluster.py @ 23:1670a33e3e6d

from markup
author Henry Thompson <ht@markup.co.uk>
date Sat, 29 May 2021 11:07:34 +0100
parents
children 668d788cac47
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster.py	Sat May 29 11:07:34 2021 +0100
@@ -0,0 +1,97 @@
+#!/usr/bin/python3
+'''Read a minutor block lockation tsv and resort it to show clusters'''
+import sys
+
+from util import *
+
+if len(sys.argv)==1 or sys.argv[1]=='-h':
+  print("""Usage: cluster.py [-h] [-c n] [-s] infile.tsv [outfile.tsv]
+           n is cluster diameter, default is 5
+           -s for strict circle distance, reduces number of candidates
+           default outfile is [infile]_c[n].tsv""")
+  exit(1)
+if sys.argv[1]=='-c':
+  sys.argv.pop(1)
+  n=float(sys.argv.pop(1))
+else:
+  n=5.0
+
+if sys.argv[1]=='-s':
+  sys.argv.pop(1)
+  strict=True
+else:
+  strict=False
+
+infile_name=sys.argv.pop(1)
+if len(sys.argv)>1:
+  outfile_name=sys.argv.pop(1)
+else:
+  outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)
+
+cc=[]
+
+with open(infile_name,'r') as infile:
+  with open(outfile_name,'w') as outfile:
+    l=infile.readline().rstrip()
+    print(l,file=outfile)
+    ff=PPAT.split(l)
+    (nr,ox,oy,oz)=intsMaybe(ff)
+    home=(float(ox),float(oy),float(oz))
+    et=ff[9]
+    l=infile.readline().rstrip()
+    print(l,file=outfile)
+    (orad,ymin,ymax)=intsMaybe(PPAT.split(l))
+    _=infile.readline()
+    for l in infile:
+      found=False
+      (_,td,qd)=l.rstrip().split('\t')
+      q=[float(i) for i in qd.split(',')]
+      td=float(td)
+      if strict and td>orad:
+        # Enforce a circular region, not square
+        print(td,orad,file=sys.stderr)
+        break
+      for c in cc:
+        for p in c:
+          if d(p,q)<=n:
+            c.append(q)
+            found=True
+            break
+        if found:
+          break
+      if not found:
+        cc.append([q])
+    oc=cc
+    cc=[] # lose
+    w=0
+    ow=-1
+    nc=[] # win
+    while True:
+      for i,c in enumerate(oc):
+        win=False
+        for p in c:
+          for g in oc[i+1:]:
+            for q in g:
+              if d(p,q)<=n:
+                win=True
+                w+=1
+                nc.append(c+g)
+                break
+            if win:
+              break
+          if win:
+            break
+        if not win:
+          cc.append(c)
+      print(len(cc),len(nc),ow,w,file=sys.stderr)
+      if ow==w:
+        break
+      ow=w
+      oc=nc
+      nc=[]
+    for c in sorted(cc,reverse=True,key=lambda x:len(x)):
+      print(c,file=outfile)
+    
+
+
+