# HG changeset patch # User Henry S. Thompson # Date 1583775578 0 # Node ID bd1db1ed4c2514e1553f32d2c0ef7f4b1a077428 # Parent 2d7c91f89f6bd77f1fc0b1077b526ec76a1364e4 found on ecclerig diff -r 2d7c91f89f6b -r bd1db1ed4c25 lazyBug.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lazyBug.py Mon Mar 09 17:39:38 2020 +0000 @@ -0,0 +1,15 @@ +'''Illustrate problem with writable cache in LazySubsequence''' + +from nltk.corpus import brown +import nltk, sys + +data = brown.tagged_sents(categories='news', tagset='universal') +train_data=data[:1000] +y=train_data[0] +print('initial:',y==train_data[0],train_data[0][:2]) +train_data[0].insert(0,('','')) +print('modified:',y==train_data[0],train_data[0][:2]) +z=len([s for s in train_data]) +print('post-view:',y==train_data[0],train_data[0][:2]) + +print("\nnltk: %s\npython: %s"%(nltk.version_info,sys.version)) diff -r 2d7c91f89f6b -r bd1db1ed4c25 nhist.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nhist.py Mon Mar 09 17:39:38 2020 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/python +# histogram counts of numeric input, uses existing counts if given +# Usage: nhist.py [-c] [-p] [binwidth] [pointCol countCol] +# Default binwidth is 100 +import sys +bins={} +minv=sys.maxint +maxv=-sys.maxint-1 +cum=False +percent=False +while len(sys.argv)>1: + if sys.argv[1]=='-c': + sys.argv.pop(1) + cum=True + elif sys.argv[1]=='-p': + sys.argv.pop(1) + cum=True + percent=True + tot=0 + else: + break +if len(sys.argv)>1: + w=int(sys.argv[1]) +else: + w=100 +if len(sys.argv)>2: + pc=int(sys.argv[2]) + cc=int(sys.argv[3]) + counts=True +else: + counts=False +for l in sys.stdin: + if counts: + ff=l.split() + n=int(ff[pc]) + c=int(ff[cc]) + else: + n=int(l) + c=1 + v=n/w + if percent: + tot+=c + bins[v]=bins.get(v,0)+c + if nmaxv: + maxv=max +if cum: + cumTot=0 +for k in sorted(bins.keys()): + if cum: + cumTot+=bins[k] + print k,bins[k], + if cum: + print cumTot, + if percent: + print "%5.2f"%(float(cumTot)*100/tot) + else: + print + +