changeset 5:bd1db1ed4c25

found on ecclerig
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 09 Mar 2020 17:39:38 +0000
parents 2d7c91f89f6b
children a56d5285575b b1ec44d254c6 1670a33e3e6d
files lazyBug.py nhist.py
diffstat 2 files changed, 76 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lazyBug.py	Mon Mar 09 17:39:38 2020 +0000
@@ -0,0 +1,15 @@
+'''Illustrate problem with writable cache in LazySubsequence'''
+
+from nltk.corpus import brown
+import nltk, sys
+
+data = brown.tagged_sents(categories='news', tagset='universal')
+train_data=data[:1000]
+y=train_data[0]
+print('initial:',y==train_data[0],train_data[0][:2])
+train_data[0].insert(0,('<s>','<s>'))
+print('modified:',y==train_data[0],train_data[0][:2])
+z=len([s for s in train_data])
+print('post-view:',y==train_data[0],train_data[0][:2])
+
+print("\nnltk: %s\npython: %s"%(nltk.version_info,sys.version))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nhist.py	Mon Mar 09 17:39:38 2020 +0000
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+# histogram counts of numeric input, uses existing counts if given
+# Usage: nhist.py [-c] [-p] [binwidth] [pointCol countCol]
+# Default binwidth is 100
+import sys
+bins={}
+minv=sys.maxint
+maxv=-sys.maxint-1
+cum=False
+percent=False
+while len(sys.argv)>1:
+  if sys.argv[1]=='-c':
+    sys.argv.pop(1)
+    cum=True
+  elif sys.argv[1]=='-p':
+    sys.argv.pop(1)
+    cum=True
+    percent=True
+    tot=0
+  else:
+    break
+if len(sys.argv)>1:
+  w=int(sys.argv[1])
+else:
+  w=100
+if len(sys.argv)>2:
+  pc=int(sys.argv[2])
+  cc=int(sys.argv[3])
+  counts=True
+else:
+  counts=False
+for l in sys.stdin:
+  if counts:
+    ff=l.split()
+    n=int(ff[pc])
+    c=int(ff[cc])
+  else:
+    n=int(l)
+    c=1
+  v=n/w
+  if percent:
+    tot+=c
+  bins[v]=bins.get(v,0)+c
+  if n<minv:
+    minv=min
+  if n>maxv:
+    maxv=max
+if cum:
+  cumTot=0
+for k in sorted(bins.keys()):
+  if cum:
+    cumTot+=bins[k]
+  print k,bins[k],
+  if cum:
+    print cumTot,
+  if percent:
+    print "%5.2f"%(float(cumTot)*100/tot)
+  else:
+    print
+
+