-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhmmviterbiMy.m
158 lines (138 loc) · 4.85 KB
/
hmmviterbiMy.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
function [currentState, logP] = hmmviterbiMy(seq,tr,e,varargin)
%HMMVITERBI calculates the most probable state path for a sequence.
% STATES = HMMVITERBI(SEQ,TRANSITIONS,EMISSIONS) given a sequence, SEQ,
% calculates the most likely path through the Hidden Markov Model
% specified by transition probability matrix, TRANSITIONS, and emission
% probability matrix, EMISSIONS. TRANSITIONS(I,J) is the probability of
% transition from state I to state J. EMISSIONS(K,L) is the probability
% that symbol L is emitted from state K.
%
% HMMVITERBI(...,'SYMBOLS',SYMBOLS) allows you to specify the symbols
% that are emitted. SYMBOLS can be a numeric array or a cell array of the
% names of the symbols. The default symbols are integers 1 through N,
% where N is the number of possible emissions.
%
% HMMVITERBI(...,'STATENAMES',STATENAMES) allows you to specify the
% names of the states. STATENAMES can be a numeric array or a cell array
% of the names of the states. The default statenames are 1 through M,
% where M is the number of states.
%
% This function always starts the model in state 1 and then makes a
% transition to the first step using the probabilities in the first row
% of the transition matrix. So in the example given below, the first
% element of the output states will be 1 with probability 0.95 and 2 with
% probability .05.
%
% Examples:
%
% tr = [0.95,0.05;
% 0.10,0.90];
%
% e = [1/6, 1/6, 1/6, 1/6, 1/6, 1/6;
% 1/10, 1/10, 1/10, 1/10, 1/10, 1/2;];
%
% [seq, states] = hmmgenerate(100,tr,e);
% estimatedStates = hmmviterbi(seq,tr,e);
%
% [seq, states] = hmmgenerate(100,tr,e,'Statenames',{'fair';'loaded'});
% estimatesStates = hmmviterbi(seq,tr,e,'Statenames',{'fair';'loaded'});
%
% See also HMMGENERATE, HMMDECODE, HMMESTIMATE, HMMTRAIN.
% Reference: Biological Sequence Analysis, Durbin, Eddy, Krogh, and
% Mitchison, Cambridge University Press, 1998.
% Copyright 1993-2011 The MathWorks, Inc.
% tr must be square
numStates = size(tr,1);
checkTr = size(tr,2);
if checkTr ~= numStates
error(message('stats:hmmviterbi:BadTransitions'));
end
% number of rows of e must be same as number of states
checkE = size(e,1);
if checkE ~= numStates
error(message('stats:hmmviterbi:InputSizeMismatch'));
end
numEmissions = size(e,2);
customStatenames = false;
% deal with options
if nargin > 3
okargs = {'symbols','statenames'};
[symbols,statenames] = ...
internal.stats.parseArgs(okargs, {[] []}, varargin{:});
if ~isempty(symbols)
numSymbolNames = numel(symbols);
if ~isvector(symbols) || numSymbolNames ~= numEmissions
error(message('stats:hmmviterbi:BadSymbols'));
end
[~, seq] = ismember(seq,symbols);
if any(seq(:)==0)
error(message('stats:hmmviterbi:MissingSymbol'));
end
end
if ~isempty(statenames)
numStateNames = length(statenames);
if numStateNames ~= numStates
error(message('stats:hmmviterbi:BadStateNames'));
end
customStatenames = true;
end
end
% work in log space to avoid numerical issues
L = length(seq);
% if any(seq(:)<1) || any(seq(:)~=round(seq(:))) || any(seq(:)>numEmissions)
% error(message('stats:hmmviterbi:BadSequence', numEmissions));
% end
currentState = zeros(1,L);
if L == 0
return
end
logTR = log(tr);
logE = log(e);
% allocate space
pTR = zeros(numStates,L);
assumption is that model is in state 1 at step 0
v = -Inf(numStates,1);
v(1,1) = 0;
vOld = v;
% v = zeros(numStates,1);
% v(1,1) = 0;
% vOld = v;
%%logE=logE';
% loop through the model
for count = 1:L
for state = 1:numStates
% for each state we calculate
% v(state) = e(state,seq(count))* max_k(vOld(:)*tr(k,state));
bestVal = -inf;
bestPTR = 0;
% use a loop to avoid lots of calls to max
for inner = 1:numStates
val = vOld(inner) + logTR(inner,state);
if val > bestVal
bestVal = val;
bestPTR = inner;
end
end
% save the best transition information for later backtracking
pTR(state,count) = bestPTR;
% update v
% v(state) = logE(state,seq(count)) + bestVal;
v(state) = logE(state,count) + bestVal;
% v(state) = logE(state,count);
%v(state) = logE(seq(count),state) + bestVal;
end
vOld = v;
end
% decide which of the final states is post probable
[logP, finalState] = max(v);
% Now back trace through the model
currentState(L) = finalState;
% for count = L-1:-1:1
% currentState(count) = pTR(currentState(count+1),count+1);
% if currentState(count) == 0
% error(message('stats:hmmviterbi:ZeroTransitionProbability', currentState( count + 1 )));
% end
% end
% if customStatenames
% currentState = reshape(statenames(currentState),1,L);
% end