comparison hmm/tinySup.py @ 3:26d9c0308fcf

updated/added from ecclerig version
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 09 Mar 2020 17:35:28 +0000
parents e07789816ca5
children
comparison
equal deleted inserted replaced
2:e07789816ca5 3:26d9c0308fcf
10 sents=[[('<s>','<s>'),('the','D'),('sheep','N'),('run','V'),('</s>','</s>')], 10 sents=[[('<s>','<s>'),('the','D'),('sheep','N'),('run','V'),('</s>','</s>')],
11 [('<s>','<s>'),('sheep','N'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')], 11 [('<s>','<s>'),('sheep','N'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')],
12 [('<s>','<s>'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')]] 12 [('<s>','<s>'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')]]
13 13
14 taglists=[('<s>',[('<s>',1),('the',0),('sheep',0),('run',0),('</s>',0)]), 14 taglists=[('<s>',[('<s>',1),('the',0),('sheep',0),('run',0),('</s>',0)]),
15 ('D',[('the',1),('sheep',0),('run',0),('<s>',0),('</s>',0)]), 15 ('D',[('the',.8),('sheep',.1),('run',.1),('<s>',0),('</s>',0)]),
16 ('N',[('the',0),('sheep',.5),('run',.5),('<s>',0),('</s>',0)]), 16 ('N',[('the',.2),('sheep',.4),('run',.4),('<s>',0),('</s>',0)]),
17 ('V',[('the',0),('sheep',.5),('run',.5),('<s>',0),('</s>',0)]), 17 ('V',[('the',.2),('sheep',.4),('run',.4),('<s>',0),('</s>',0)]),
18 ('</s>',[('<s>',0),('the',0),('sheep',0),('run',0),('</s>',1)])] 18 ('</s>',[('<s>',0),('the',0),('sheep',0),('run',0),('</s>',1)])]
19 19
20 tagdict=dict((k,MLEProbDist(FreqDist(dict(v)))) for k,v in taglists) 20 tagdict=dict((k,MLEProbDist(FreqDist(dict(v)))) for k,v in taglists)
21 21
22 priors = MLEProbDist(FreqDist({'<s>':1, 22 priors = MLEProbDist(FreqDist({'<s>':1,
46 nm=HiddenMarkovModelTrainer(states=tagset,symbols=symbols) 46 nm=HiddenMarkovModelTrainer(states=tagset,symbols=symbols)
47 47
48 # Note that contrary to naive reading of the documentation, 48 # Note that contrary to naive reading of the documentation,
49 # train_unsupervised expects a sequence of sequences of word/tag pairs, 49 # train_unsupervised expects a sequence of sequences of word/tag pairs,
50 # it just ignores the tags 50 # it just ignores the tags
51 nnm=nm.train_unsupervised(sents,model=model,max_iterations=10,updateOutputs=False) 51 nnm=nm.train_unsupervised(sents,model=model,max_iterations=15,updateOutputs=False)
52 52
53 for tag in tagset: 53 for tag in tagset:
54 if tag=='</s>': 54 if tag=='</s>':
55 break 55 break
56 cp=nnm._transitions[tag] 56 cp=nnm._transitions[tag]