diff python/cluster.py @ 4:56508a6033a9

minutor chunk hacking
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 25 May 2021 14:01:26 -0400
parents
children 672621ab4db4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/cluster.py	Tue May 25 14:01:26 2021 -0400
@@ -0,0 +1,84 @@
+#!/usr/bin/python3
+'''Read a minutor block lockation tsv and resort it to show clusters'''
+import sys
+
+from util import *
+
+if len(sys.argv)==1 or sys.argv[1]=='-h':
+  print("""Usage: cluster.py [-h] [-c n] infile.tsv [outfile.tsv]
+           n is cluster diameter, default is 5
+           default outfile is [infile]_c[n].tsv""")
+  exit(1)
+if sys.argv[1]=='-c':
+  sys.argv.pop(1)
+  n=float(sys.argv.pop(1))
+else:
+  n=5.0
+
+infile_name=sys.argv.pop(1)
+if len(sys.argv)>1:
+  outfile_name=sys.argv.pop(1)
+else:
+  outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)
+
+cc=[]
+
+with open(infile_name,'r') as infile:
+  with open(outfile_name,'w') as outfile:
+    l=infile.readline().rstrip()
+    print(l,file=outfile)
+    ff=PPAT.split(l)
+    (nr,ox,oy,oz)=intsMaybe(ff)
+    et=ff[9]
+    l=infile.readline().rstrip()
+    print(l,file=outfile)
+    (orad,ymin,ymax)=intsMaybe(PPAT.split(l))
+    print(nr,ox,oy,oz,et,orad,ymin,ymax)
+    _=infile.readline()
+    for l in infile:
+      found=False
+      q=[float(i) for i in l.split('\t')[2].split(',')]
+      for c in cc:
+        for p in c:
+          if d(p,q)<=n:
+            c.append(q)
+            found=True
+            break
+        if found:
+          break
+      if not found:
+        cc.append([q])
+    oc=cc
+    cc=[] # lose
+    w=0
+    ow=-1
+    nc=[] # win
+    while True:
+      for i,c in enumerate(oc):
+        win=False
+        for p in c:
+          for g in oc[i+1:]:
+            for q in g:
+              if d(p,q)<=n:
+                win=True
+                w+=1
+                nc.append(c+g)
+                break
+            if win:
+              break
+          if win:
+            break
+        if not win:
+          cc.append(c)
+      print(len(cc),len(nc),ow,w,file=sys.stderr)
+      if ow==w:
+        break
+      ow=w
+      oc=nc
+      nc=[]
+    for c in sorted(cc,reverse=True,key=lambda x:len(x)):
+      print(c,file=outfile)
+    
+
+
+