Mercurial > hg > lib > markup
view python/cluster.py @ 4:56508a6033a9
minutor chunk hacking
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 25 May 2021 14:01:26 -0400 |
parents | |
children | 672621ab4db4 |
line wrap: on
line source
#!/usr/bin/python3 '''Read a minutor block lockation tsv and resort it to show clusters''' import sys from util import * if len(sys.argv)==1 or sys.argv[1]=='-h': print("""Usage: cluster.py [-h] [-c n] infile.tsv [outfile.tsv] n is cluster diameter, default is 5 default outfile is [infile]_c[n].tsv""") exit(1) if sys.argv[1]=='-c': sys.argv.pop(1) n=float(sys.argv.pop(1)) else: n=5.0 infile_name=sys.argv.pop(1) if len(sys.argv)>1: outfile_name=sys.argv.pop(1) else: outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n) cc=[] with open(infile_name,'r') as infile: with open(outfile_name,'w') as outfile: l=infile.readline().rstrip() print(l,file=outfile) ff=PPAT.split(l) (nr,ox,oy,oz)=intsMaybe(ff) et=ff[9] l=infile.readline().rstrip() print(l,file=outfile) (orad,ymin,ymax)=intsMaybe(PPAT.split(l)) print(nr,ox,oy,oz,et,orad,ymin,ymax) _=infile.readline() for l in infile: found=False q=[float(i) for i in l.split('\t')[2].split(',')] for c in cc: for p in c: if d(p,q)<=n: c.append(q) found=True break if found: break if not found: cc.append([q]) oc=cc cc=[] # lose w=0 ow=-1 nc=[] # win while True: for i,c in enumerate(oc): win=False for p in c: for g in oc[i+1:]: for q in g: if d(p,q)<=n: win=True w+=1 nc.append(c+g) break if win: break if win: break if not win: cc.append(c) print(len(cc),len(nc),ow,w,file=sys.stderr) if ow==w: break ow=w oc=nc nc=[] for c in sorted(cc,reverse=True,key=lambda x:len(x)): print(c,file=outfile)