latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 /*---------------------------------------------------------------- 00024 * project ....: LTI Digital Image/Signal Processing Library 00025 * file .......: ltiHmmViterbiPathSearch.h 00026 * authors ....: Benjamin Winkler 00027 * organization: LTI, RWTH Aachen 00028 * creation ...: 12.9.2001 00029 * revisions ..: $Id: ltiHmmViterbiPathSearch.h,v 1.6 2007/01/10 02:26:18 alvarado Exp $ 00030 */ 00031 00032 #ifndef _LTI_HMM_VITERBI_PATH_SEARCH_H_ 00033 #define _LTI_HMM_VITERBI_PATH_SEARCH_H_ 00034 00035 #include "ltiSequence.h" 00036 #include "ltiVector.h" 00037 #include "ltiMatrix.h" 00038 #include "ltiHiddenMarkovModel.h" 00039 00040 namespace lti { 00041 00042 /** 00043 * This class finds and evaluates the best path through a Hidden Markov Model 00044 * (HMM) for a given observation sequence \f$\mathbf{O}\f$. The best path is 00045 * found with the Viterbi algorithm. The algorithm is taken from "Fundamentals 00046 * of speech recognition" (L. Rabiner and B.-H. Juang, Prentice Hall, 1993) and 00047 * can also be found in "Erkennung kontinuierlicher Gebärdensprache mit Ganzwortmodellen" 00048 * (H. C. Hienz, PhD thesis, Chair of Technical Computer Science, 2000). 00049 * The notation in this documentation is taken from the latter literature. 00050 * 00051 * Increased computational efficiency and precision are achieved by using 00052 * scores with \f$Score(\mathbf{X}|\lambda):=-\ln P^*(\mathbf{X}|\lambda)\f$ 00053 * instead of probabilities. Note that the lowest score corresponds to the 00054 * highest probability and thus represents the best recognition result. 00055 * 00056 * <b>Viterbi algorithm:</b> 00057 * <dl> 00058 * <dt>Preprocessing:</dt> 00059 * <dd>\f$\tilde{\pi}_i=-\ln(\pi_i)\f$</dd> 00060 * <dd>\f$\tilde{b}_j(O_t)=-\ln(b_j(O_t))\f$</dd> 00061 * <dd>\f$\tilde{a}_{ij}=-\ln(a_{ij})\f$</dd> 00062 * <dd>\f$1\leq t\leq T$, $1\leq j\leq N\f$</dd> 00063 * <dt>Initialisation:</dt> 00064 * <dd>\f$\tilde{\delta}_1(j)=\tilde{\pi}_i + \tilde{b}_j(O_1)\f$</dd> 00065 * <dd>\f$\psi_1(j)=0\f$</dd> 00066 * <dd>\f$1\leq j\leq N\f$</dd> 00067 * <dt>Recursion:</dt> 00068 * <dd>\f$\tilde{\delta}_t(j)=\displaystyle\min_{i}\left\{ 00069 * \tilde{\delta}_{t-1} + \tilde{a}_{ij} 00070 * \right\} + \tilde{b}_j(O_t)\f$</dd> 00071 * <dd>\f$\psi_t(j)=argmin_{i}\left\{\tilde{\delta}_{t-1} + 00072 * \tilde{a}_{ij}\right\}\f$</dd> 00073 * <dd>\f$2\leq t\leq T$, $1\leq j\leq N\f$</dd> 00074 * <dt>Termination:</dt> 00075 * <dd>\f$Score(\mathbf{X}|\lambda) = -\ln P^*(\mathbf{X}|\lambda) = 00076 * \displaystyle\min_{j}\left\{\tilde{\delta}_T(j)\right\}\f$</dd> 00077 * <dd>\f$q^*_T=argmin_{j}\left\{\tilde{\delta}_T(j)\right\}\f$</dd> 00078 * <dd>\f$1\leq j\leq N\f$</dd> 00079 * <dt>Backtracking:</dt> 00080 * <dd>\f$q^*_t=\psi_{t+1}(q^*_{t+1})\f$</dd> 00081 * <dd>\f$1\leq t<T\f$</dd> 00082 * </dl> 00083 * 00084 * <dl> 00085 * <dt>The score of the emission probability \f$b_j(\mathbf{x})\f$ is 00086 * approximated by the centre of the distribution, which provides the best 00087 * estimation of the observation, as follows:</dt> 00088 * <dd>\f$-\ln(b_j(\mathbf{x})) \approx \displaystyle\min_{m} \left( 00089 * -\ln c_{jm}-\ln b_{jm}(\mathbf{x})\right)\f$.</dd> 00090 * <dt>Thereby, the score \f$-\ln c_{jm}\f$ denotes the weight scores of the 00091 * mixture densities \f$b_j(\mathbf{x})\f$. 00092 * All components \f$b_{jm}(\mathbf{x})\f$ of \f$b_j(\mathbf{x})\f$ are 00093 * modelled by either laplace or gaussian distributions.</dt> 00094 * <dt>Laplace:</dt> 00095 * <dd>\f$-\ln(b_{jm}(\mathbf{x})) = \sum\limits_{k=1}^d \left\{ \left| 00096 * \frac{x_k - \mu_{jkm}} {\sigma_{jkm}} \right| + ln(2\sigma_{jkm}) 00097 * \right\}\f$</dd> 00098 * <dt>Gaussian:</dt> 00099 * <dd>\f$-\ln(b_{jm}(\mathbf{x})) = \frac{1}{2} \cdot \sum\limits_{k=1}^d 00100 * \left\{ \left( \frac{x_k - \mu_{jkm}} {\sigma_{jkm}} \right)^2 + 00101 * ln(2\pi\sigma_{jkm}^2) \right\}\f$</dd> 00102 * </dl> 00103 * 00104 * An alternative notation for the score of the best path 00105 * \f$\mathbf{q}^*\f$ is 00106 * \f$Score(\mathbf{X}|\lambda)=-\sum^{T-1}_{t=1}{\ln(a_{q_tq_{t-1}})}- 00107 * \sum^{T-1}_{t=1}{\ln(b_{q_t}(\mathbf{x}_t))}\f$. 00108 * 00109 * <b>Note</b>: Each state sequence \f$\mathbf{q}\f$ always starts in the 00110 * first state \f$s_1\f$ and ends in the last state \f$s_N\f$, whereas 00111 * \f$N\f$ denotes the number of states \f$s_j\f$. 00112 * Thus, the termination of the Viterbi algorithm is implemented as 00113 * \f$Score(\mathbf{X}|\lambda)=\tilde{\delta}_T(N)\f$. 00114 * 00115 * <b>Note</b>: The preprocessing values \f$\tilde{\pi}_i\f$, 00116 * \f$\tilde{b}_j(\mathbf{x})\f$ and \f$\tilde{a}_{ij}\f$ are usually 00117 * estimated over a given data set by an automatic training algorithm 00118 * (see hmmTrainer). 00119 * 00120 */ 00121 class hmmViterbiPathSearch { 00122 public: 00123 00124 /** 00125 * type of an observation sequence 00126 */ 00127 typedef sequence<dvector> observationSequence; 00128 00129 /** 00130 * default constructor 00131 */ 00132 hmmViterbiPathSearch(); 00133 00134 /** 00135 * copy constructor 00136 * @param other the object to be copied 00137 */ 00138 hmmViterbiPathSearch(const hmmViterbiPathSearch& other); 00139 00140 /** 00141 * destructor 00142 */ 00143 virtual ~hmmViterbiPathSearch(); 00144 00145 /** 00146 * returns the name of this type ("hmmViterbiPathSearch") 00147 */ 00148 virtual const char* getTypeName() const; 00149 00150 /** 00151 * copy data of "other" functor. 00152 * @param other the functor to be copied 00153 * @return a reference to this functor object 00154 */ 00155 hmmViterbiPathSearch& copy(const hmmViterbiPathSearch& other); 00156 00157 00158 00159 /** 00160 * Performs the Viterbi path search on a given (observed) data sequence and 00161 * returns its score. 00162 * @param model the Hidden Markov Model. 00163 * @param srcData contains the sequence of observed vectors. 00164 * @result score of the best path through the model for the given sequence. 00165 */ 00166 double apply(const hiddenMarkovModel &model, const observationSequence &srcData); 00167 00168 /** 00169 * Calculates score along a given path for a given (observed) data sequence. 00170 * @param model the Hidden Markov Model. 00171 * @param srcData contains the sequence of observed vectors. 00172 * @param path defines the applicable state sequence. 00173 * @result score of the given path. 00174 */ 00175 double evaluatePath(const hiddenMarkovModel &model, const observationSequence &srcData, const ivector &path); 00176 00177 /** 00178 * Calculates score along a given path for a given (observed) data sequence, 00179 * and additionally selects the specified density centers of the mixture densities 00180 * in the visited path. 00181 * @param model the hidden markov model 00182 * @param srcData contains the data sequence. 00183 * @param path defines the applicable state sequence. 00184 * @param densityPath contains the applicable mixture density center selection. 00185 * @result score of the given path. 00186 */ 00187 double evaluatePath(const hiddenMarkovModel &model, const observationSequence &srcData, 00188 const ivector &path, const ivector &densityPath); 00189 00190 /** 00191 * Find out lowest possible score for the given model and a sequence of the specified length. 00192 * @param model the hidden markov model 00193 * @param pathLength length of the sequence 00194 * @return minimum score 00195 */ 00196 double minimumPathScore(const hiddenMarkovModel &model, int pathLength) const; 00197 00198 00199 /** 00200 * find out lowest possible score of the given model for a given path. 00201 * @param model the hidden markov model 00202 * @param stateSelection the state selection along the given path 00203 * @param densitySelection the density selection along the given path. if this is empty, the best densities are chosen. 00204 * @return the minimum score of the given path. 00205 */ 00206 double minimumPathScore(const hiddenMarkovModel &model, const ivector &stateSelection, const ivector &densitySelection = ivector()) const; 00207 00208 /** @name State selection logging 00209 * These methods can be used to enable/disable logging and access the selection of 00210 * states \f$s_j\f$ during a call of apply(). 00211 * Default is "disabled", for efficiency reasons. Enable only if necessary. 00212 */ 00213 //@{ 00214 00215 /** 00216 * Turns logging of state selection on and off. 00217 * This enables to review the state matrix and the path leading to the minimal score. 00218 * 00219 * By default, this is turned off for efficiency reasons (memory). 00220 */ 00221 void logStateSelection(bool enable = true); 00222 00223 /** 00224 * @return matrix of the best states leading to the minimal score. 00225 * If logging of state selection is turned off or a given path was evaluated, the matrix will be empty. 00226 */ 00227 const matrix<int>& getBackpointerMatrix() const; 00228 00229 /** 00230 * @return chosen path of the last run leading to the minimal score 00231 * If logging of state selection is turned off or a given path was evaluated, the vector will be empty. 00232 */ 00233 const vector<int>& getStatePath(); 00234 00235 //@} 00236 00237 00238 /** @name Density selection logging 00239 * These methods can be used to enable/disable logging and access the selection of 00240 * densities during a call of apply(). Density selection stands for the centre \f$m\f$ 00241 * of the distribution \f$b_j(\mathbf{x})\f$, which provides the best estimation of 00242 * the observation and is thus selected during the Viterbi path search. 00243 * Default is "disabled", for efficiency reasons. Enable only if necessary. 00244 */ 00245 //@{ 00246 00247 /** 00248 * Turns logging of density selection matrix on and off. 00249 * This enables to review the densities selected during evaluation of a sequence. 00250 * 00251 * By default, this is turned off for efficiency reasons (memory). 00252 */ 00253 void logDensitySelection(bool enable = true); 00254 00255 /** 00256 * @return matrix of the densities used in the last pass. 00257 * If logging of density selection is turned off or a given path was evaluated, the matrix will be empty. 00258 */ 00259 const matrix<int>& getDensitySelectionMatrix() const; 00260 00261 /** 00262 * @return densities selected for the chosen path. 00263 * If logging of density selection is turned off, the vector will be empty. 00264 */ 00265 const vector<int>& getDensitySelectionPath(); 00266 00267 //@} 00268 00269 00270 /** @name Emission score logging 00271 * These methods can be used to enable/disable logging and access the emission 00272 * scores \f$-\ln(b_j(\mathbf{x}))\f$ during a call of apply(). 00273 * Default is "disabled", for efficiency reasons. Enable only if necessary. 00274 */ 00275 //@{ 00276 00277 /** 00278 * Turns logging of emission score matrix on and off. 00279 * This enables to analyse the emission scores generated for each step and state 00280 * 00281 * By default, this is turned off for efficiency reasons (runtime and memory). 00282 */ 00283 void logEmissionScore(bool enable = true); 00284 00285 /** 00286 * @return matrix of the emission scores generated in the last pass. 00287 * If logging of emission scores is turned off or a given path was evaluated, the matrix will be empty. 00288 */ 00289 const matrix<double>& getEmissionScoreMatrix() const; 00290 00291 /** 00292 * @return emission scores generated for the chosen path. 00293 * If logging of emission score is turned off, the vector will be empty. 00294 */ 00295 const vector<double>& getEmissionScorePath(); 00296 00297 //@} 00298 00299 /** 00300 * @name Routines for online classification 00301 */ 00302 //@{ 00303 00304 /** 00305 * perform initial transition and add emission score, i.e. initialisation of the 00306 * Viterbi algorithm 00307 * @return last entry of result vector 00308 */ 00309 double initialStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result) const; 00310 00311 /** 00312 * perform subsequent transition and add emission score, i.e. one iteration of the 00313 * Viterbi algorithm recursion 00314 * @return last entry of result vector 00315 */ 00316 double subsequentStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result) const; 00317 00318 00319 //@} 00320 00321 00322 private: 00323 00324 00325 /** 00326 * clear logged data 00327 */ 00328 void reset(); 00329 00330 00331 /** 00332 * add initial transition vector to result 00333 * adding is done to allow for later implementation of observation chain recognition 00334 */ 00335 void initialTransition(const hiddenMarkovModel &model, dvector &result) const; 00336 00337 /** 00338 * calculate best transitions 00339 */ 00340 void subsequentTransition(const hiddenMarkovModel &model, dvector &result, ivector &predecessors) const; 00341 00342 /** 00343 * calculate emission score for given frame and add it to the result vector 00344 */ 00345 void addEmissionScore(const hiddenMarkovModel &model, const dvector &frame, 00346 dvector &result, ivector &densitySelection, dvector &scoreSelection) const; 00347 00348 /** 00349 * perform initial transition and add emission score 00350 * @return last entry of result vector 00351 */ 00352 double initialStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result, 00353 ivector &densitySelection, dvector &scoreSelection) const; 00354 00355 /** 00356 * perform subsequent transition and add emission score 00357 * @return last entry of result vector 00358 */ 00359 double subsequentStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result, 00360 ivector &stateSelection, ivector &densitySelection, dvector &scoreSelection) const; 00361 00362 // score functions 00363 00364 /** 00365 * calculate score for given frame and density 00366 */ 00367 double getDensityScore(const hiddenMarkovModel &model, const dvector &frame, const hiddenMarkovModel::singleDensity &dens) const; 00368 00369 /** 00370 * calculate score for given frame and state, return best density 00371 */ 00372 double getStateScoreAndDensity(const hiddenMarkovModel &model, const dvector &frame, const hiddenMarkovModel::mixtureDensity &state, int &dens) const; 00373 00374 /** 00375 * gaussian Score (i.e. -ln(N(mean, variance)), where N is the normal distribution) 00376 */ 00377 double gaussScore(const dvector &frame, const dvector &mean, 00378 const dvector &variance, const dvector &featureWeights) const; 00379 00380 /** 00381 * laplace Score (i.e. -ln(L(mean, variance)), where L is the laplace distribution) 00382 */ 00383 double laplaceScore(const dvector &frame, const dvector &mean, 00384 const dvector &variance, const dvector &featureWeights) const; 00385 00386 /** 00387 * defines, whether state selection is logged 00388 */ 00389 bool registerStateSelection; 00390 00391 /** 00392 * backtrace matrix will be saved here 00393 */ 00394 matrix<int> stateMatrix; 00395 00396 /** 00397 * best path will be generated by getStatePath and then saved here 00398 */ 00399 vector<int> statePath; 00400 00401 00402 /** 00403 * defines, whether density selection is logged 00404 */ 00405 bool registerDensitySelection; 00406 00407 /** 00408 * density selection matrix will be saved here 00409 */ 00410 matrix<int> densityMatrix; 00411 00412 /** 00413 * density selection path will be generated by getDensityPath and then saved here 00414 */ 00415 vector<int> densityPath; 00416 00417 00418 /** 00419 * defines, whether emission scores are logged 00420 */ 00421 bool registerEmissionScore; 00422 00423 /** 00424 * emission score matrix will be saved here 00425 */ 00426 matrix<double> emissionScoreMatrix; 00427 00428 /** 00429 * emission score path will be generated by getEmissionScorePath and then saved here 00430 */ 00431 vector<double> emissionScorePath; 00432 00433 00434 // friend class hmmTrainer; 00435 00436 private: 00437 00438 int stateCount; 00439 00440 }; 00441 00442 } 00443 00444 #endif