Mercurial > hg > python
annotate signif.py @ 49:e67a5ecd6198
add [-u] to usage (?)
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 31 Jul 2022 19:07:01 +0100 |
parents | 4d9778ade7b2 |
children |
rev | line source |
---|---|
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 from nltk import FreqDist |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 from random import randint |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 import pylab |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 from math import sqrt |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 def mean(self): |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
7 try: |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
8 return self.mean_value |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
9 except AttributeError: |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
10 # Assumes the keys of this distribution are numbers! |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
11 self.mean_value=float(sum(v*self[v] for v in self.keys()))/self.N() |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
12 return self.mean_value |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
13 |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
14 def sd(self): |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
15 try: |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
16 return self.sd_value |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
17 except AttributeError: |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
18 ssd = 0 |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
19 for v in self.keys(): |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
20 d = v-self.mean() |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
21 ssd+=d*d*self[v] |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
22 self.sd_value=sqrt(ssd/float(self.N())) |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
23 return self.sd_value |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
24 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
25 FreqDist.mean=mean |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
26 FreqDist.sd=sd |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
27 |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
28 def bell(self,maxVal=None,bars=False,block=True,**kwargs): |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
29 # Assumes the keys of this distribution are numbers! |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
30 if maxVal is not None: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
31 sk = sorted([k for k in self.keys() if k<=maxVal]) # range(max(self.keys())+1) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
32 else: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
33 sk=sorted(self.keys()) |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
34 print(len(sk)) |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
35 #sk.append(sk[-1]+1) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
36 #sk[0:0]=[(sk[0]-1)] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
37 mm=0 # sk[0] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
38 mean = self.mean() |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
39 sd = self.sd() |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
40 #print (mean,sd) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
41 kv=[self[k] for k in sk] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
42 pylab.figure().subplots_adjust(bottom=0.15) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
43 pylab.plot(sk,kv,color='blue') |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
44 if 'xtra' in kwargs and kwargs['xtra']: |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
45 xtra=kwargs['xtra'] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
46 pylab.plot(sk,[xtra[k] for k in sk],color='red') |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
47 if bars: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
48 pylab.bar([s-mm for s in sk],kv, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
49 align='center',color='white',edgecolor='pink') |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
50 pylab.xticks(sk,rotation=90) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
51 mv=self[self.max()] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
52 bb=(-mv/10,mv+(mv/10)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
53 pylab.plot((mean-mm,mean-mm),bb, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
54 (mean-mm-sd,mean-mm-sd),bb, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
55 (mean-mm-(2*sd),mean-mm-(2*sd)),bb, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
56 (mean-mm+sd,mean-mm+sd),bb, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
57 (mean-mm+(2*sd),mean-mm+(2*sd)),bb, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
58 color='green') |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
59 pylab.xlabel("N %s, max %s\nmean %5.2f, s.d. %5.2f"%(self.N(),mv,mean, sd)) |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
60 pylab.show(block=block) |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
61 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
62 FreqDist.bell=bell |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
63 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
64 def ranks(l,**kvargs): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
65 # compute the rank of every element in a list |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
66 # uses sort, passing on all kv args |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
67 # uses key kv arg itself |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
68 # _Very_ inefficient, in several ways! |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
69 # Result is a pair: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
70 # list of ranks |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
71 # list of tie information, each elt the magnitude of a tie group |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
72 s=sorted(l,**kvargs) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
73 i=0 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
74 res=[] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
75 td=[] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
76 if kvargs.has_key('key'): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
77 kf=kvargs['key'] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
78 else: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
79 kf=lambda x:x |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
80 while i<len(l): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
81 ties=[x for x in s if kf(s[i])==kf(x)] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
82 if len(ties)>1: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
83 td.append(len(ties)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
84 r=float(i+1+(i+len(ties)))/2.0 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
85 for e in ties: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
86 res.append((r,e)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
87 i+=1 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
88 return (res,td) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
89 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
90 def mannWhitneyU(fd1,fd2,forceZ=False): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
91 # Compute Mann Whitney U test for two frequency distributions |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
92 # For n1 and n2 <= 20, see http://www.soc.univ.keiv.ua/LIB/PUB/T/textual.pdf |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
93 # to look up significance levels on the result: see Part 3 section 10, |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
94 # actual page 150 (printed page 144) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
95 # Or use http://faculty.vassar.edu/lowry/utest.html to do it for you |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
96 # For n1 and n2 > 20, U itself is normally distributed, we |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
97 # return a tuple with a z-test value |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
98 # HST DOES NOT BELIEVE THIS IS CORRECT -- DOES NOT APPEAR TO GIVE CORRECT ANSWERS!! |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
99 r1=[(lambda x:x.append(1) or x)(list(x)) for x in fd1.items()] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
100 r2=[(lambda x:x.append(2) or x)(list(x)) for x in fd2.items()] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
101 n1=len(r1) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
102 n2=len(r2) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
103 (ar,ties)=ranks(r1+r2,key=lambda e:e[1]) |
32
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
104 s1=sum(r[0] for r in ar if r[1][2] == 1) |
4d9778ade7b2
python3, add sd and mean caching
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
0
diff
changeset
|
105 s2=sum(r[0] for r in ar if r[1][2] == 2) |
0
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
106 u1=float(n1*n2)+(float(n1*(n1+1))/2.0)-float(s1) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
107 u2=float(n1*n2)+(float(n2*(n2+1))/2.0)-float(s2) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
108 u=min(u1,u2) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
109 if forceZ or n1>20 or n2>20: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
110 # we can treat U as sample from a normal distribution, and compute |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
111 # a z-score |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
112 # See e.g. http://mlsc.lboro.ac.uk/resources/statistics/Mannwhitney.pdf |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
113 mu=float(n1*n2)/2.0 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
114 if len(ties)>0: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
115 n=float(n1+n2) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
116 ts=sum((float((t*t*t)-t)/12.0) for t in ties) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
117 su=sqrt((float(n1*n2)/(n*n-1))*((float((n*n*n)-n)/12.0)-ts)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
118 else: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
119 su=sqrt(float(n1*n2*(n1+n2+1))/12.0) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
120 z=(u-mu)/su |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
121 return (n1,n2,u,z) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
122 else: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
123 return (n1,n2,u) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
124 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
125 # This started from http://dr-adorio-adventures.blogspot.com/2010/05/draft-untested.html |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
126 # but has a number of bug fixes |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
127 def Rank(l,**kvargs): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
128 # compute the rank of every element in a list |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
129 # uses sort, passing on all kv args |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
130 # uses key kv arg itself |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
131 # _Very_ inefficient, in several ways! |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
132 # Result is a list of pairs ( r, v) where r is a rank and v is an input value |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
133 s=sorted(l,**kvargs) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
134 i=0 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
135 res=[] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
136 if kvargs.has_key('key'): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
137 kf=kvargs['key'] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
138 else: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
139 kf=lambda x:x |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
140 while i<len(l): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
141 ties=[x for x in s if kf(s[i])==kf(x)] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
142 r=float(i+1+(i+len(ties)))/2.0 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
143 #print (i,r,ties) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
144 for e in ties: |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
145 res.append((r,e)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
146 i+=1 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
147 return (res) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
148 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
149 def mannWhitney(S1, S2): |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
150 """ |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
151 Returns the Mann-Whitney U statistic of two samples S1 and S2. |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
152 """ |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
153 # Form a single array with a categorical variable indicate the sample |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
154 X = [(s, 0) for s in S1] |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
155 X.extend([(s,1) for s in S2]) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
156 R = Rank(X,key=lambda x:x[0]) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
157 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
158 # Compute needed parameters. |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
159 n1 = float(len(S1)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
160 n2 = float(len(S2)) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
161 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
162 # Compute total ranks for sample 1. |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
163 R1 = sum([i for i, (x,j) in R if j == 0]) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
164 R2 = sum([i for i, (x,j) in R if j == 1]) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
165 u1 = (n1*n2)+((n1*(n1+1))/2.0)-R1 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
166 u2 = n1 * n2 - u1 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
167 U = min(u1, u2) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
168 #print u1,R1/n1,R2/n2 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
169 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
170 mU = n1 * n2 / 2.0 |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
171 sigmaU = sqrt((n1 * n2 * (n1 + n2 + 1))/12.0) |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
172 return u1, R1/n1,R2/n2, (U-mU)/sigmaU |
fee51ab07d09
blanket publication of all existing python files in lib/python on maritain
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
173 |