annotate signif.py @ 6:a56d5285575b

drafted Vector.checkNew, still need found0 and foundN
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 10 Mar 2020 19:56:07 +0000
parents fee51ab07d09
children 4d9778ade7b2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 from nltk import FreqDist
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 from random import randint
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 import pylab
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 from math import sqrt
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 def mean(self):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 # Assumes the keys of this distribution are numbers!
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 return float(sum(v*self[v] for v in self.keys()))/self.N()
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 FreqDist.mean=mean
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12 def bell(self,maxVal=None,bars=False,**kwargs):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 # Assumes the keys of this distribution are numbers!
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 if maxVal is not None:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 sk = sorted([k for k in self.keys() if k<=maxVal]) # range(max(self.keys())+1)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 else:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 sk=sorted(self.keys())
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 print len(sk)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 #sk.append(sk[-1]+1)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 #sk[0:0]=[(sk[0]-1)]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 mm=0 # sk[0]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22 mean = self.mean()
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 tot = 0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24 ssd = 0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 for v in self.keys():
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26 d = v-mean
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 ssd+=d*d*self[v]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28 sd=sqrt(ssd/float(self.N()))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29 #print (mean,sd)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 kv=[self[k] for k in sk]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31 pylab.figure().subplots_adjust(bottom=0.15)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
32 pylab.plot(sk,kv,color='blue')
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
33 if kwargs['xtra']:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34 xtra=kwargs['xtra']
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
35 pylab.plot(sk,[xtra[k] for k in sk],color='red')
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36 if bars:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37 pylab.bar([s-mm for s in sk],kv,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38 align='center',color='white',edgecolor='pink')
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
39 pylab.xticks(sk,rotation=90)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
40 mv=self[self.max()]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
41 bb=(-mv/10,mv+(mv/10))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42 pylab.plot((mean-mm,mean-mm),bb,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
43 (mean-mm-sd,mean-mm-sd),bb,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
44 (mean-mm-(2*sd),mean-mm-(2*sd)),bb,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
45 (mean-mm+sd,mean-mm+sd),bb,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
46 (mean-mm+(2*sd),mean-mm+(2*sd)),bb,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
47 color='green')
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
48 pylab.xlabel("N %s, max %s\nmean %5.2f, s.d. %5.2f"%(self.N(),mv,mean, sd))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
49 pylab.show()
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
50
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
51 FreqDist.bell=bell
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
52
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
53 def ranks(l,**kvargs):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
54 # compute the rank of every element in a list
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
55 # uses sort, passing on all kv args
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
56 # uses key kv arg itself
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
57 # _Very_ inefficient, in several ways!
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
58 # Result is a pair:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
59 # list of ranks
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
60 # list of tie information, each elt the magnitude of a tie group
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
61 s=sorted(l,**kvargs)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
62 i=0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
63 res=[]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
64 td=[]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
65 if kvargs.has_key('key'):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
66 kf=kvargs['key']
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
67 else:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
68 kf=lambda x:x
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
69 while i<len(l):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
70 ties=[x for x in s if kf(s[i])==kf(x)]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
71 if len(ties)>1:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
72 td.append(len(ties))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
73 r=float(i+1+(i+len(ties)))/2.0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
74 for e in ties:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
75 res.append((r,e))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
76 i+=1
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
77 return (res,td)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
78
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
79 def mannWhitneyU(fd1,fd2,forceZ=False):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
80 # Compute Mann Whitney U test for two frequency distributions
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
81 # For n1 and n2 <= 20, see http://www.soc.univ.keiv.ua/LIB/PUB/T/textual.pdf
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
82 # to look up significance levels on the result: see Part 3 section 10,
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
83 # actual page 150 (printed page 144)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
84 # Or use http://faculty.vassar.edu/lowry/utest.html to do it for you
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
85 # For n1 and n2 > 20, U itself is normally distributed, we
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
86 # return a tuple with a z-test value
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
87 # HST DOES NOT BELIEVE THIS IS CORRECT -- DOES NOT APPEAR TO GIVE CORRECT ANSWERS!!
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
88 r1=[(lambda x:x.append(1) or x)(list(x)) for x in fd1.items()]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
89 r2=[(lambda x:x.append(2) or x)(list(x)) for x in fd2.items()]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
90 n1=len(r1)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
91 n2=len(r2)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
92 (ar,ties)=ranks(r1+r2,key=lambda e:e[1])
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
93 s1=sum(r[0] for r in ar if r[1][2] is 1)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
94 s2=sum(r[0] for r in ar if r[1][2] is 2)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
95 u1=float(n1*n2)+(float(n1*(n1+1))/2.0)-float(s1)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
96 u2=float(n1*n2)+(float(n2*(n2+1))/2.0)-float(s2)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
97 u=min(u1,u2)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
98 if forceZ or n1>20 or n2>20:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
99 # we can treat U as sample from a normal distribution, and compute
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
100 # a z-score
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
101 # See e.g. http://mlsc.lboro.ac.uk/resources/statistics/Mannwhitney.pdf
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
102 mu=float(n1*n2)/2.0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
103 if len(ties)>0:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
104 n=float(n1+n2)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
105 ts=sum((float((t*t*t)-t)/12.0) for t in ties)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
106 su=sqrt((float(n1*n2)/(n*n-1))*((float((n*n*n)-n)/12.0)-ts))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
107 else:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
108 su=sqrt(float(n1*n2*(n1+n2+1))/12.0)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
109 z=(u-mu)/su
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
110 return (n1,n2,u,z)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
111 else:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
112 return (n1,n2,u)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
113
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
114 # This started from http://dr-adorio-adventures.blogspot.com/2010/05/draft-untested.html
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
115 # but has a number of bug fixes
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
116 def Rank(l,**kvargs):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
117 # compute the rank of every element in a list
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
118 # uses sort, passing on all kv args
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
119 # uses key kv arg itself
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
120 # _Very_ inefficient, in several ways!
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
121 # Result is a list of pairs ( r, v) where r is a rank and v is an input value
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
122 s=sorted(l,**kvargs)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
123 i=0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
124 res=[]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
125 if kvargs.has_key('key'):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
126 kf=kvargs['key']
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
127 else:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
128 kf=lambda x:x
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
129 while i<len(l):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
130 ties=[x for x in s if kf(s[i])==kf(x)]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
131 r=float(i+1+(i+len(ties)))/2.0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
132 #print (i,r,ties)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
133 for e in ties:
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
134 res.append((r,e))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
135 i+=1
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
136 return (res)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
137
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
138 def mannWhitney(S1, S2):
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
139 """
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
140 Returns the Mann-Whitney U statistic of two samples S1 and S2.
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
141 """
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
142 # Form a single array with a categorical variable indicate the sample
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
143 X = [(s, 0) for s in S1]
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
144 X.extend([(s,1) for s in S2])
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
145 R = Rank(X,key=lambda x:x[0])
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
146
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
147 # Compute needed parameters.
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
148 n1 = float(len(S1))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
149 n2 = float(len(S2))
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
150
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
151 # Compute total ranks for sample 1.
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
152 R1 = sum([i for i, (x,j) in R if j == 0])
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
153 R2 = sum([i for i, (x,j) in R if j == 1])
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
154 u1 = (n1*n2)+((n1*(n1+1))/2.0)-R1
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
155 u2 = n1 * n2 - u1
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
156 U = min(u1, u2)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
157 #print u1,R1/n1,R2/n2
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
158
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
159 mU = n1 * n2 / 2.0
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
160 sigmaU = sqrt((n1 * n2 * (n1 + n2 + 1))/12.0)
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
161 return u1, R1/n1,R2/n2, (U-mU)/sigmaU
fee51ab07d09 blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
162