Mercurial > hg > python
diff cluster.py @ 23:1670a33e3e6d
from markup
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Sat, 29 May 2021 11:07:34 +0100 |
parents | |
children | 668d788cac47 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster.py Sat May 29 11:07:34 2021 +0100 @@ -0,0 +1,97 @@ +#!/usr/bin/python3 +'''Read a minutor block lockation tsv and resort it to show clusters''' +import sys + +from util import * + +if len(sys.argv)==1 or sys.argv[1]=='-h': + print("""Usage: cluster.py [-h] [-c n] [-s] infile.tsv [outfile.tsv] + n is cluster diameter, default is 5 + -s for strict circle distance, reduces number of candidates + default outfile is [infile]_c[n].tsv""") + exit(1) +if sys.argv[1]=='-c': + sys.argv.pop(1) + n=float(sys.argv.pop(1)) +else: + n=5.0 + +if sys.argv[1]=='-s': + sys.argv.pop(1) + strict=True +else: + strict=False + +infile_name=sys.argv.pop(1) +if len(sys.argv)>1: + outfile_name=sys.argv.pop(1) +else: + outfile_name="%s_c%s.tsv"%(infile_name.split('.')[0],n) + +cc=[] + +with open(infile_name,'r') as infile: + with open(outfile_name,'w') as outfile: + l=infile.readline().rstrip() + print(l,file=outfile) + ff=PPAT.split(l) + (nr,ox,oy,oz)=intsMaybe(ff) + home=(float(ox),float(oy),float(oz)) + et=ff[9] + l=infile.readline().rstrip() + print(l,file=outfile) + (orad,ymin,ymax)=intsMaybe(PPAT.split(l)) + _=infile.readline() + for l in infile: + found=False + (_,td,qd)=l.rstrip().split('\t') + q=[float(i) for i in qd.split(',')] + td=float(td) + if strict and td>orad: + # Enforce a circular region, not square + print(td,orad,file=sys.stderr) + break + for c in cc: + for p in c: + if d(p,q)<=n: + c.append(q) + found=True + break + if found: + break + if not found: + cc.append([q]) + oc=cc + cc=[] # lose + w=0 + ow=-1 + nc=[] # win + while True: + for i,c in enumerate(oc): + win=False + for p in c: + for g in oc[i+1:]: + for q in g: + if d(p,q)<=n: + win=True + w+=1 + nc.append(c+g) + break + if win: + break + if win: + break + if not win: + cc.append(c) + print(len(cc),len(nc),ow,w,file=sys.stderr) + if ow==w: + break + ow=w + oc=nc + nc=[] + for c in sorted(cc,reverse=True,key=lambda x:len(x)): + print(c,file=outfile) + + + +