python: hmm/tinySup.py comparison

updated/added from ecclerig version

author	Henry S. Thompson <ht@inf.ed.ac.uk>
date	Mon, 09 Mar 2020 17:35:28 +0000
parents	e07789816ca5
children

comparison

equal deleted inserted replaced

-:e07789816ca5
+:26d9c0308fcf
 sents=[[('<s>','<s>'),('the','D'),('sheep','N'),('run','V'),('</s>','</s>')],
 [('<s>','<s>'),('sheep','N'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')],
 [('<s>','<s>'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')]]
 taglists=[('<s>',[('<s>',1),('the',0),('sheep',0),('run',0),('</s>',0)]),
-('D',[('the',1),('sheep',0),('run',0),('<s>',0),('</s>',0)]),
+('D',[('the',.8),('sheep',.1),('run',.1),('<s>',0),('</s>',0)]),
-('N',[('the',0),('sheep',.5),('run',.5),('<s>',0),('</s>',0)]),
+('N',[('the',.2),('sheep',.4),('run',.4),('<s>',0),('</s>',0)]),
-('V',[('the',0),('sheep',.5),('run',.5),('<s>',0),('</s>',0)]),
+('V',[('the',.2),('sheep',.4),('run',.4),('<s>',0),('</s>',0)]),
 ('</s>',[('<s>',0),('the',0),('sheep',0),('run',0),('</s>',1)])]
 tagdict=dict((k,MLEProbDist(FreqDist(dict(v)))) for k,v in taglists)
 priors = MLEProbDist(FreqDist({'<s>':1,
 nm=HiddenMarkovModelTrainer(states=tagset,symbols=symbols)
 # Note that contrary to naive reading of the documentation,
 #  train_unsupervised expects a sequence of sequences of word/tag pairs,
 #  it just ignores the tags
-nnm=nm.train_unsupervised(sents,model=model,max_iterations=10,updateOutputs=False)
+nnm=nm.train_unsupervised(sents,model=model,max_iterations=15,updateOutputs=False)
 for tag in tagset:
 if tag=='</s>':
 break
 cp=nnm._transitions[tag]

Mercurial > hg > python