Mercurial > hg > python
comparison hmm/tinySup.py @ 3:26d9c0308fcf
updated/added from ecclerig version
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 09 Mar 2020 17:35:28 +0000 |
parents | e07789816ca5 |
children |
comparison
equal
deleted
inserted
replaced
2:e07789816ca5 | 3:26d9c0308fcf |
---|---|
10 sents=[[('<s>','<s>'),('the','D'),('sheep','N'),('run','V'),('</s>','</s>')], | 10 sents=[[('<s>','<s>'),('the','D'),('sheep','N'),('run','V'),('</s>','</s>')], |
11 [('<s>','<s>'),('sheep','N'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')], | 11 [('<s>','<s>'),('sheep','N'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')], |
12 [('<s>','<s>'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')]] | 12 [('<s>','<s>'),('run','V'),('the','D'),('sheep','N'),('</s>','</s>')]] |
13 | 13 |
14 taglists=[('<s>',[('<s>',1),('the',0),('sheep',0),('run',0),('</s>',0)]), | 14 taglists=[('<s>',[('<s>',1),('the',0),('sheep',0),('run',0),('</s>',0)]), |
15 ('D',[('the',1),('sheep',0),('run',0),('<s>',0),('</s>',0)]), | 15 ('D',[('the',.8),('sheep',.1),('run',.1),('<s>',0),('</s>',0)]), |
16 ('N',[('the',0),('sheep',.5),('run',.5),('<s>',0),('</s>',0)]), | 16 ('N',[('the',.2),('sheep',.4),('run',.4),('<s>',0),('</s>',0)]), |
17 ('V',[('the',0),('sheep',.5),('run',.5),('<s>',0),('</s>',0)]), | 17 ('V',[('the',.2),('sheep',.4),('run',.4),('<s>',0),('</s>',0)]), |
18 ('</s>',[('<s>',0),('the',0),('sheep',0),('run',0),('</s>',1)])] | 18 ('</s>',[('<s>',0),('the',0),('sheep',0),('run',0),('</s>',1)])] |
19 | 19 |
20 tagdict=dict((k,MLEProbDist(FreqDist(dict(v)))) for k,v in taglists) | 20 tagdict=dict((k,MLEProbDist(FreqDist(dict(v)))) for k,v in taglists) |
21 | 21 |
22 priors = MLEProbDist(FreqDist({'<s>':1, | 22 priors = MLEProbDist(FreqDist({'<s>':1, |
46 nm=HiddenMarkovModelTrainer(states=tagset,symbols=symbols) | 46 nm=HiddenMarkovModelTrainer(states=tagset,symbols=symbols) |
47 | 47 |
48 # Note that contrary to naive reading of the documentation, | 48 # Note that contrary to naive reading of the documentation, |
49 # train_unsupervised expects a sequence of sequences of word/tag pairs, | 49 # train_unsupervised expects a sequence of sequences of word/tag pairs, |
50 # it just ignores the tags | 50 # it just ignores the tags |
51 nnm=nm.train_unsupervised(sents,model=model,max_iterations=10,updateOutputs=False) | 51 nnm=nm.train_unsupervised(sents,model=model,max_iterations=15,updateOutputs=False) |
52 | 52 |
53 for tag in tagset: | 53 for tag in tagset: |
54 if tag=='</s>': | 54 if tag=='</s>': |
55 break | 55 break |
56 cp=nnm._transitions[tag] | 56 cp=nnm._transitions[tag] |