Mercurial > hg > cc > cirrus_work
changeset 34:2e8002a64f72
compute and graph confidence intervals
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 23 Nov 2022 11:05:45 +0000 |
parents | 317bf47b506c |
children | f76656fa98f7 |
files | bin/spearman.py |
diffstat | 1 files changed, 31 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/spearman.py Tue Nov 22 19:13:25 2022 +0000 +++ b/bin/spearman.py Wed Nov 23 11:05:45 2022 +0000 @@ -23,7 +23,7 @@ import matplotlib.pyplot as plt import pylab -import sys +import sys, math def qqa(): # q-q plot for the whole crawl @@ -46,7 +46,9 @@ plt.show() def plot_x(sort=False,block=True): - # Make these two subplots... + # Make these two subplots, w. and w/o sorting + # See https://stackoverflow.com/questions/4700614/how-to-put-the-legend-outside-the-plot + # for legend hacking if sort: aso=np.argsort(-all) plot_all=all[aso] @@ -59,7 +61,7 @@ plt.plot(plot_x,'bx',label='Mean of rank correlation of each segment x all other segments') plt.plot([0,N-1],[xm,xm],'b',label='Mean of segment x segment means') plt.axis([0,N-1,0.8,1.0]) - plt.legend(loc='best') + plt.legend(loc='best',fontsize='small') plt.grid(True) plt.show(block=block) @@ -85,6 +87,32 @@ sdax.set_xticklabels([str(s) for s,v in sdd]) plt.show() +def ci(rho,n,conf=0.95): + # Courtesy of https://stats.stackexchange.com/a/18904 + # rho is (rank) correlation, n is sample size + stderr=1.0/math.sqrt(n-3) + z=stats.norm.ppf(1.0-((1.0-conf)/2)) + delta=z*stderr + lower=math.tanh(math.atanh(rho)-delta) + upper=math.tanh(math.atanh(rho)+delta) + return (lower,upper) + +def plot_ci(rhos,n,trim=None,conf=0.95): + # rhos are (rank) correlation values + rhos_s=rhos[(-rhos).argsort()] + if trim is None: + l=len(rhos) + else: + rhos_s=rhos_s[:trim] + l=trim + cc=(np.array([ci(r,n,conf) for r in rhos_s])).T + ue=cc[1]-rhos_s + le=rhos_s-cc[0] + #for i in range(len(rhos)): + #print(cc[i][0],rhos_s[i]-cc[i][0],rhos_s[i],cc[i][1],-rhos_s[i]+cc[i][1]) + plt.errorbar(np.arange(l),rhos_s,yerr=[le,ue],fmt='o') + plt.show() + def first_diff(ranks): # first disagreement with baseline == {1,2,...} for i in range(len(ranks)):