Mercurial > hg > python
changeset 5:bd1db1ed4c25
found on ecclerig
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 09 Mar 2020 17:39:38 +0000 |
parents | 2d7c91f89f6b |
children | a56d5285575b b1ec44d254c6 1670a33e3e6d |
files | lazyBug.py nhist.py |
diffstat | 2 files changed, 76 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lazyBug.py Mon Mar 09 17:39:38 2020 +0000 @@ -0,0 +1,15 @@ +'''Illustrate problem with writable cache in LazySubsequence''' + +from nltk.corpus import brown +import nltk, sys + +data = brown.tagged_sents(categories='news', tagset='universal') +train_data=data[:1000] +y=train_data[0] +print('initial:',y==train_data[0],train_data[0][:2]) +train_data[0].insert(0,('<s>','<s>')) +print('modified:',y==train_data[0],train_data[0][:2]) +z=len([s for s in train_data]) +print('post-view:',y==train_data[0],train_data[0][:2]) + +print("\nnltk: %s\npython: %s"%(nltk.version_info,sys.version))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nhist.py Mon Mar 09 17:39:38 2020 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/python +# histogram counts of numeric input, uses existing counts if given +# Usage: nhist.py [-c] [-p] [binwidth] [pointCol countCol] +# Default binwidth is 100 +import sys +bins={} +minv=sys.maxint +maxv=-sys.maxint-1 +cum=False +percent=False +while len(sys.argv)>1: + if sys.argv[1]=='-c': + sys.argv.pop(1) + cum=True + elif sys.argv[1]=='-p': + sys.argv.pop(1) + cum=True + percent=True + tot=0 + else: + break +if len(sys.argv)>1: + w=int(sys.argv[1]) +else: + w=100 +if len(sys.argv)>2: + pc=int(sys.argv[2]) + cc=int(sys.argv[3]) + counts=True +else: + counts=False +for l in sys.stdin: + if counts: + ff=l.split() + n=int(ff[pc]) + c=int(ff[cc]) + else: + n=int(l) + c=1 + v=n/w + if percent: + tot+=c + bins[v]=bins.get(v,0)+c + if n<minv: + minv=min + if n>maxv: + maxv=max +if cum: + cumTot=0 +for k in sorted(bins.keys()): + if cum: + cumTot+=bins[k] + print k,bins[k], + if cum: + print cumTot, + if percent: + print "%5.2f"%(float(cumTot)*100/tot) + else: + print + +