# HG changeset patch # User Henry S. Thompson # Date 1669201545 0 # Node ID 2e8002a64f729c41cb7495873ad99d6ebb1af6b6 # Parent 317bf47b506cda5602fa630e4bd073d4c58947a8 compute and graph confidence intervals diff -r 317bf47b506c -r 2e8002a64f72 bin/spearman.py --- a/bin/spearman.py Tue Nov 22 19:13:25 2022 +0000 +++ b/bin/spearman.py Wed Nov 23 11:05:45 2022 +0000 @@ -23,7 +23,7 @@ import matplotlib.pyplot as plt import pylab -import sys +import sys, math def qqa(): # q-q plot for the whole crawl @@ -46,7 +46,9 @@ plt.show() def plot_x(sort=False,block=True): - # Make these two subplots... + # Make these two subplots, w. and w/o sorting + # See https://stackoverflow.com/questions/4700614/how-to-put-the-legend-outside-the-plot + # for legend hacking if sort: aso=np.argsort(-all) plot_all=all[aso] @@ -59,7 +61,7 @@ plt.plot(plot_x,'bx',label='Mean of rank correlation of each segment x all other segments') plt.plot([0,N-1],[xm,xm],'b',label='Mean of segment x segment means') plt.axis([0,N-1,0.8,1.0]) - plt.legend(loc='best') + plt.legend(loc='best',fontsize='small') plt.grid(True) plt.show(block=block) @@ -85,6 +87,32 @@ sdax.set_xticklabels([str(s) for s,v in sdd]) plt.show() +def ci(rho,n,conf=0.95): + # Courtesy of https://stats.stackexchange.com/a/18904 + # rho is (rank) correlation, n is sample size + stderr=1.0/math.sqrt(n-3) + z=stats.norm.ppf(1.0-((1.0-conf)/2)) + delta=z*stderr + lower=math.tanh(math.atanh(rho)-delta) + upper=math.tanh(math.atanh(rho)+delta) + return (lower,upper) + +def plot_ci(rhos,n,trim=None,conf=0.95): + # rhos are (rank) correlation values + rhos_s=rhos[(-rhos).argsort()] + if trim is None: + l=len(rhos) + else: + rhos_s=rhos_s[:trim] + l=trim + cc=(np.array([ci(r,n,conf) for r in rhos_s])).T + ue=cc[1]-rhos_s + le=rhos_s-cc[0] + #for i in range(len(rhos)): + #print(cc[i][0],rhos_s[i]-cc[i][0],rhos_s[i],cc[i][1],-rhos_s[i]+cc[i][1]) + plt.errorbar(np.arange(l),rhos_s,yerr=[le,ue],fmt='o') + plt.show() + def first_diff(ranks): # first disagreement with baseline == {1,2,...} for i in range(len(ranks)):