changeset 36:30f8af85c3fd

merged
author Henry Thompson <ht@markup.co.uk>
date Thu, 29 Jul 2021 12:55:28 +0100
parents 447b9346453b (current diff) a5a353728540 (diff)
children a0b702a76872
files
diffstat 4 files changed, 111 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/distr_1.py	Thu Jul 29 12:55:28 2021 +0100
@@ -0,0 +1,27 @@
+#!/usr/bin/python3
+'''Display a bell curve for a fake normal distribution'''
+# experimenting with normal distributions of coin flips
+# Usage: distr.py Nexprs Mflips [differentN]
+# Requires python3.6 as of 2021-07-28
+import sys, random, signif
+from nltk import FreqDist
+
+n=int(sys.argv[1])
+m=int(sys.argv[2])
+if len(sys.argv)>3:
+  nn=int(sys.argv[3])
+else:
+  nn=None
+fd=FreqDist()
+for i in range(n):
+    fd[sum(random.randint(0,1) for i in range(m))]+=1
+
+if nn is not None:
+  fd2=FreqDist()
+  for i in range(nn):
+      fd2[sum(random.randint(0,1) for i in range(m))]+=1
+
+  fd.bell(xtra=fd2)
+else:
+  fd.bell()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/distr_2.py	Thu Jul 29 12:55:28 2021 +0100
@@ -0,0 +1,15 @@
+# experimenting with distributions
+# Usage: distr.py N bucketWidth max
+import sys, random, signif
+from nltk import FreqDist
+
+mu=float(sys.argv[1])
+sigma=float(sys.argv[2])
+n=int(sys.argv[3])
+scale=float(sys.argv[4])
+fd=FreqDist()
+fd2=FreqDist()
+for i in xrange(n):
+  fd[round(random.normalvariate(mu,sigma)/scale)]+=1
+  fd2[round(random.gauss(mu,sigma)/scale)]+=1
+fd.bell(xtra=fd2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/distr_3.py	Thu Jul 29 12:55:28 2021 +0100
@@ -0,0 +1,44 @@
+#!/usr/bin/python3
+'''Simulate stats for showing segments are the same as the whole'''
+# Simulate normal distributions of coin flips with slightly
+#  unfair coins
+# Usage: distr_3.py segsize flipcount nsegs max-bias
+# Requires python3.6 as of 2021-07-28
+# Runs nsegs*segsize experiments, each involving flipcount flips.
+#  Experiments in a given segment have a bias around 0.5,
+#   randomly chosen between 0 and max-bias (which must be < 0.5
+
+import sys, random, signif
+from nltk import FreqDist
+
+n=int(sys.argv[1])
+m=int(sys.argv[2])
+ss=int(sys.argv[3])
+b=float(sys.argv[4])
+
+whole=FreqDist()
+
+def fd(n,m,bias):
+  global whole
+  f=FreqDist()
+  split=random.uniform(0.5-bias,0.5+bias)
+  for i in range(n):
+    res=sum(int(random.uniform(0,1)>split) for j in range(m))
+    f[res]+=1
+    whole[res]+=1
+  return f
+
+segs=[fd(n,m,b) for s in range(ss)]
+
+print(whole[1],whole.mean(), whole.sd(),sep='\t')
+print()
+
+for s in segs:
+  print(s[1],s.mean(), s.sd(),sep='\t')
+
+exit()
+
+whole.bell(block=False)
+segs[4].bell()
+
+
--- a/signif.py	Thu Jul 29 12:54:26 2021 +0100
+++ b/signif.py	Thu Jul 29 12:55:28 2021 +0100
@@ -4,33 +4,44 @@
 from math import sqrt
 
 def mean(self):
-  # Assumes the keys of this distribution are numbers!
-  return float(sum(v*self[v] for v in self.keys()))/self.N()
+  try:
+    return self.mean_value
+  except AttributeError:
+    # Assumes the keys of this distribution are numbers!
+    self.mean_value=float(sum(v*self[v] for v in self.keys()))/self.N()
+    return self.mean_value
+
+def sd(self):
+  try:
+    return self.sd_value
+  except AttributeError:
+    ssd = 0
+    for v in self.keys():
+      d = v-self.mean()
+      ssd+=d*d*self[v]
+    self.sd_value=sqrt(ssd/float(self.N()))
+    return self.sd_value
 
 FreqDist.mean=mean
+FreqDist.sd=sd
 
-def bell(self,maxVal=None,bars=False,**kwargs):
+def bell(self,maxVal=None,bars=False,block=True,**kwargs):
   # Assumes the keys of this distribution are numbers!
   if maxVal is not None:
     sk = sorted([k for k in self.keys() if k<=maxVal]) # range(max(self.keys())+1)
   else:
     sk=sorted(self.keys())
-  print len(sk)
+  print(len(sk))
   #sk.append(sk[-1]+1)
   #sk[0:0]=[(sk[0]-1)]
   mm=0 # sk[0]
   mean = self.mean()
-  tot = 0
-  ssd = 0
-  for v in self.keys():
-    d = v-mean
-    ssd+=d*d*self[v]
-  sd=sqrt(ssd/float(self.N()))
+  sd = self.sd()
   #print (mean,sd)
   kv=[self[k] for k in sk]
   pylab.figure().subplots_adjust(bottom=0.15)
   pylab.plot(sk,kv,color='blue')
-  if kwargs['xtra']:
+  if 'xtra' in kwargs and kwargs['xtra']:
     xtra=kwargs['xtra']
     pylab.plot(sk,[xtra[k] for k in sk],color='red')
   if bars:
@@ -46,7 +57,7 @@
              (mean-mm+(2*sd),mean-mm+(2*sd)),bb,
              color='green')
   pylab.xlabel("N %s, max %s\nmean %5.2f, s.d. %5.2f"%(self.N(),mv,mean, sd))
-  pylab.show()
+  pylab.show(block=block)
 
 FreqDist.bell=bell
 
@@ -90,8 +101,8 @@
   n1=len(r1)
   n2=len(r2)
   (ar,ties)=ranks(r1+r2,key=lambda e:e[1])
-  s1=sum(r[0] for r in ar if r[1][2] is 1)
-  s2=sum(r[0] for r in ar if r[1][2] is 2)
+  s1=sum(r[0] for r in ar if r[1][2] == 1)
+  s2=sum(r[0] for r in ar if r[1][2] == 2)
   u1=float(n1*n2)+(float(n1*(n1+1))/2.0)-float(s1)
   u2=float(n1*n2)+(float(n2*(n2+1))/2.0)-float(s2)
   u=min(u1,u2)