Mercurial > hg > cc > cirrus_work
changeset 29:669a0b120d34
start work on ranking,
lose faith in getting row vs. column correct every time
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 16 Nov 2022 19:52:50 +0000 |
parents | 7ffb686ca060 |
children | c73ec9deabbe |
files | bin/spearman.py |
diffstat | 1 files changed, 25 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/spearman.py Wed Nov 16 17:29:55 2022 +0000 +++ b/bin/spearman.py Wed Nov 16 19:52:50 2022 +0000 @@ -67,17 +67,37 @@ sdax.set_xticklabels([str(s) for s,v in sdd]) plt.show() +def first_diff(ranks): + # first disagreement with baseline == {1,2,...} + for i in range(len(ranks)): + if ranks[i]!=i+1.0: + return i + return i+1 + +def ranks(): + # Combine segment measures: + # segID,rank corr. wrt all,inverse variance, mean cross rank corr.,first disagreement + return np.array([i,all[i],1.0/xd[i].variance,xd[i].mean,first_diff(ranks[i])]) + counts=loadtxt(sys.argv[1]+".csv",delimiter=',') -o=stats.spearmanr(counts,nan_policy='omit') +# "If axis=0 (default), then each column represents a variable, with +# observations in the rows" +ranks=[stats.rankdata(-counts[i],method='average') for for i in range(1,100)] +corr=stats.spearmanr(counts,nan_policy='omit').correlation -all=o.correlation[0][1:] +all=corr[0][1:] all_s=stats.describe(all) all_m=all_s.mean -# Should get the confidence interval for this, so we can -# use it in plot_x -x=np.array([np.concatenate((o.correlation[i][1:i],o.correlation[i][i+1:])) for i in range(1,101)]) +x=np.array([np.concatenate((corr[i][1:i], + corr[i][i+1:])) for i in range(1,101)]) xd=[stats.describe(x[i]) for i in range(100)] xs=stats.describe(np.array([xd[i].mean for i in range(100)])) xm=xs.mean xsd=np.sqrt(xs.variance) + +### I need to review rows, e.g. counts[0] is an array of 101 counts +### for the most common label in the complete crawl, +### from the complete crawl and all the segments +### versus columns, e.g. counts[:,0] is an array of 100 decreasing counts +### for all the labels in the complete crawl