Mercurial > hg > lib > markup
diff python/cluster.py @ 4:56508a6033a9
minutor chunk hacking
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 25 May 2021 14:01:26 -0400 |
parents | |
children | 672621ab4db4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/cluster.py Tue May 25 14:01:26 2021 -0400 @@ -0,0 +1,84 @@ +#!/usr/bin/python3 +'''Read a minutor block lockation tsv and resort it to show clusters''' +import sys + +from util import * + +if len(sys.argv)==1 or sys.argv[1]=='-h': + print("""Usage: cluster.py [-h] [-c n] infile.tsv [outfile.tsv] + n is cluster diameter, default is 5 + default outfile is [infile]_c[n].tsv""") + exit(1) +if sys.argv[1]=='-c': + sys.argv.pop(1) + n=float(sys.argv.pop(1)) +else: + n=5.0 + +infile_name=sys.argv.pop(1) +if len(sys.argv)>1: + outfile_name=sys.argv.pop(1) +else: + outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n) + +cc=[] + +with open(infile_name,'r') as infile: + with open(outfile_name,'w') as outfile: + l=infile.readline().rstrip() + print(l,file=outfile) + ff=PPAT.split(l) + (nr,ox,oy,oz)=intsMaybe(ff) + et=ff[9] + l=infile.readline().rstrip() + print(l,file=outfile) + (orad,ymin,ymax)=intsMaybe(PPAT.split(l)) + print(nr,ox,oy,oz,et,orad,ymin,ymax) + _=infile.readline() + for l in infile: + found=False + q=[float(i) for i in l.split('\t')[2].split(',')] + for c in cc: + for p in c: + if d(p,q)<=n: + c.append(q) + found=True + break + if found: + break + if not found: + cc.append([q]) + oc=cc + cc=[] # lose + w=0 + ow=-1 + nc=[] # win + while True: + for i,c in enumerate(oc): + win=False + for p in c: + for g in oc[i+1:]: + for q in g: + if d(p,q)<=n: + win=True + w+=1 + nc.append(c+g) + break + if win: + break + if win: + break + if not win: + cc.append(c) + print(len(cc),len(nc),ow,w,file=sys.stderr) + if ow==w: + break + ow=w + oc=nc + nc=[] + for c in sorted(cc,reverse=True,key=lambda x:len(x)): + print(c,file=outfile) + + + +