view python/cluster.py @ 4:56508a6033a9

minutor chunk hacking
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 25 May 2021 14:01:26 -0400
parents
children 672621ab4db4
line wrap: on
line source

#!/usr/bin/python3
'''Read a minutor block lockation tsv and resort it to show clusters'''
import sys

from util import *

if len(sys.argv)==1 or sys.argv[1]=='-h':
  print("""Usage: cluster.py [-h] [-c n] infile.tsv [outfile.tsv]
           n is cluster diameter, default is 5
           default outfile is [infile]_c[n].tsv""")
  exit(1)
if sys.argv[1]=='-c':
  sys.argv.pop(1)
  n=float(sys.argv.pop(1))
else:
  n=5.0

infile_name=sys.argv.pop(1)
if len(sys.argv)>1:
  outfile_name=sys.argv.pop(1)
else:
  outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n)

cc=[]

with open(infile_name,'r') as infile:
  with open(outfile_name,'w') as outfile:
    l=infile.readline().rstrip()
    print(l,file=outfile)
    ff=PPAT.split(l)
    (nr,ox,oy,oz)=intsMaybe(ff)
    et=ff[9]
    l=infile.readline().rstrip()
    print(l,file=outfile)
    (orad,ymin,ymax)=intsMaybe(PPAT.split(l))
    print(nr,ox,oy,oz,et,orad,ymin,ymax)
    _=infile.readline()
    for l in infile:
      found=False
      q=[float(i) for i in l.split('\t')[2].split(',')]
      for c in cc:
        for p in c:
          if d(p,q)<=n:
            c.append(q)
            found=True
            break
        if found:
          break
      if not found:
        cc.append([q])
    oc=cc
    cc=[] # lose
    w=0
    ow=-1
    nc=[] # win
    while True:
      for i,c in enumerate(oc):
        win=False
        for p in c:
          for g in oc[i+1:]:
            for q in g:
              if d(p,q)<=n:
                win=True
                w+=1
                nc.append(c+g)
                break
            if win:
              break
          if win:
            break
        if not win:
          cc.append(c)
      print(len(cc),len(nc),ow,w,file=sys.stderr)
      if ow==w:
        break
      ow=w
      oc=nc
      nc=[]
    for c in sorted(cc,reverse=True,key=lambda x:len(x)):
      print(c,file=outfile)