Mercurial > hg > python
changeset 36:30f8af85c3fd
merged
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 29 Jul 2021 12:55:28 +0100 |
parents | 447b9346453b (current diff) a5a353728540 (diff) |
children | a0b702a76872 |
files | |
diffstat | 4 files changed, 111 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distr_1.py Thu Jul 29 12:55:28 2021 +0100 @@ -0,0 +1,27 @@ +#!/usr/bin/python3 +'''Display a bell curve for a fake normal distribution''' +# experimenting with normal distributions of coin flips +# Usage: distr.py Nexprs Mflips [differentN] +# Requires python3.6 as of 2021-07-28 +import sys, random, signif +from nltk import FreqDist + +n=int(sys.argv[1]) +m=int(sys.argv[2]) +if len(sys.argv)>3: + nn=int(sys.argv[3]) +else: + nn=None +fd=FreqDist() +for i in range(n): + fd[sum(random.randint(0,1) for i in range(m))]+=1 + +if nn is not None: + fd2=FreqDist() + for i in range(nn): + fd2[sum(random.randint(0,1) for i in range(m))]+=1 + + fd.bell(xtra=fd2) +else: + fd.bell() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distr_2.py Thu Jul 29 12:55:28 2021 +0100 @@ -0,0 +1,15 @@ +# experimenting with distributions +# Usage: distr.py N bucketWidth max +import sys, random, signif +from nltk import FreqDist + +mu=float(sys.argv[1]) +sigma=float(sys.argv[2]) +n=int(sys.argv[3]) +scale=float(sys.argv[4]) +fd=FreqDist() +fd2=FreqDist() +for i in xrange(n): + fd[round(random.normalvariate(mu,sigma)/scale)]+=1 + fd2[round(random.gauss(mu,sigma)/scale)]+=1 +fd.bell(xtra=fd2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distr_3.py Thu Jul 29 12:55:28 2021 +0100 @@ -0,0 +1,44 @@ +#!/usr/bin/python3 +'''Simulate stats for showing segments are the same as the whole''' +# Simulate normal distributions of coin flips with slightly +# unfair coins +# Usage: distr_3.py segsize flipcount nsegs max-bias +# Requires python3.6 as of 2021-07-28 +# Runs nsegs*segsize experiments, each involving flipcount flips. +# Experiments in a given segment have a bias around 0.5, +# randomly chosen between 0 and max-bias (which must be < 0.5 + +import sys, random, signif +from nltk import FreqDist + +n=int(sys.argv[1]) +m=int(sys.argv[2]) +ss=int(sys.argv[3]) +b=float(sys.argv[4]) + +whole=FreqDist() + +def fd(n,m,bias): + global whole + f=FreqDist() + split=random.uniform(0.5-bias,0.5+bias) + for i in range(n): + res=sum(int(random.uniform(0,1)>split) for j in range(m)) + f[res]+=1 + whole[res]+=1 + return f + +segs=[fd(n,m,b) for s in range(ss)] + +print(whole[1],whole.mean(), whole.sd(),sep='\t') +print() + +for s in segs: + print(s[1],s.mean(), s.sd(),sep='\t') + +exit() + +whole.bell(block=False) +segs[4].bell() + +
--- a/signif.py Thu Jul 29 12:54:26 2021 +0100 +++ b/signif.py Thu Jul 29 12:55:28 2021 +0100 @@ -4,33 +4,44 @@ from math import sqrt def mean(self): - # Assumes the keys of this distribution are numbers! - return float(sum(v*self[v] for v in self.keys()))/self.N() + try: + return self.mean_value + except AttributeError: + # Assumes the keys of this distribution are numbers! + self.mean_value=float(sum(v*self[v] for v in self.keys()))/self.N() + return self.mean_value + +def sd(self): + try: + return self.sd_value + except AttributeError: + ssd = 0 + for v in self.keys(): + d = v-self.mean() + ssd+=d*d*self[v] + self.sd_value=sqrt(ssd/float(self.N())) + return self.sd_value FreqDist.mean=mean +FreqDist.sd=sd -def bell(self,maxVal=None,bars=False,**kwargs): +def bell(self,maxVal=None,bars=False,block=True,**kwargs): # Assumes the keys of this distribution are numbers! if maxVal is not None: sk = sorted([k for k in self.keys() if k<=maxVal]) # range(max(self.keys())+1) else: sk=sorted(self.keys()) - print len(sk) + print(len(sk)) #sk.append(sk[-1]+1) #sk[0:0]=[(sk[0]-1)] mm=0 # sk[0] mean = self.mean() - tot = 0 - ssd = 0 - for v in self.keys(): - d = v-mean - ssd+=d*d*self[v] - sd=sqrt(ssd/float(self.N())) + sd = self.sd() #print (mean,sd) kv=[self[k] for k in sk] pylab.figure().subplots_adjust(bottom=0.15) pylab.plot(sk,kv,color='blue') - if kwargs['xtra']: + if 'xtra' in kwargs and kwargs['xtra']: xtra=kwargs['xtra'] pylab.plot(sk,[xtra[k] for k in sk],color='red') if bars: @@ -46,7 +57,7 @@ (mean-mm+(2*sd),mean-mm+(2*sd)),bb, color='green') pylab.xlabel("N %s, max %s\nmean %5.2f, s.d. %5.2f"%(self.N(),mv,mean, sd)) - pylab.show() + pylab.show(block=block) FreqDist.bell=bell @@ -90,8 +101,8 @@ n1=len(r1) n2=len(r2) (ar,ties)=ranks(r1+r2,key=lambda e:e[1]) - s1=sum(r[0] for r in ar if r[1][2] is 1) - s2=sum(r[0] for r in ar if r[1][2] is 2) + s1=sum(r[0] for r in ar if r[1][2] == 1) + s2=sum(r[0] for r in ar if r[1][2] == 2) u1=float(n1*n2)+(float(n1*(n1+1))/2.0)-float(s1) u2=float(n1*n2)+(float(n2*(n2+1))/2.0)-float(s2) u=min(u1,u2)