ltiHmmViterbiPathSearch.h

00001 /*
00002  * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 /*----------------------------------------------------------------
00024  * project ....: LTI Digital Image/Signal Processing Library
00025  * file .......: ltiHmmViterbiPathSearch.h
00026  * authors ....: Benjamin Winkler
00027  * organization: LTI, RWTH Aachen
00028  * creation ...: 12.9.2001
00029  * revisions ..: $Id: ltiHmmViterbiPathSearch.h,v 1.6 2007/01/10 02:26:18 alvarado Exp $
00030  */
00031 
00032 #ifndef _LTI_HMM_VITERBI_PATH_SEARCH_H_
00033 #define _LTI_HMM_VITERBI_PATH_SEARCH_H_
00034 
00035 #include "ltiSequence.h"
00036 #include "ltiVector.h"
00037 #include "ltiMatrix.h"
00038 #include "ltiHiddenMarkovModel.h"
00039 
00040 namespace lti {
00041 
00042   /**
00043    * This class finds and evaluates the best path through a Hidden Markov Model
00044    * (HMM) for a given observation sequence \f$\mathbf{O}\f$. The best path is
00045    * found with the Viterbi algorithm. The algorithm is taken from "Fundamentals
00046    * of speech recognition" (L. Rabiner and B.-H. Juang, Prentice Hall, 1993) and
00047    * can also be found in "Erkennung kontinuierlicher Geb�rdensprache mit Ganzwortmodellen"
00048    * (H. C. Hienz, PhD thesis, Chair of Technical Computer Science, 2000).
00049    * The notation in this documentation is taken from the latter literature.
00050    *
00051    * Increased computational efficiency and precision are achieved by using
00052    * scores with \f$Score(\mathbf{X}|\lambda):=-\ln P^*(\mathbf{X}|\lambda)\f$
00053    * instead of probabilities. Note that the lowest score corresponds to the
00054    * highest probability and thus represents the best recognition result.
00055    *
00056    * <b>Viterbi algorithm:</b>
00057    * <dl>
00058    * <dt>Preprocessing:</dt>
00059    * <dd>\f$\tilde{\pi}_i=-\ln(\pi_i)\f$</dd>
00060    * <dd>\f$\tilde{b}_j(O_t)=-\ln(b_j(O_t))\f$</dd>
00061    * <dd>\f$\tilde{a}_{ij}=-\ln(a_{ij})\f$</dd>
00062    * <dd>\f$1\leq t\leq T$, $1\leq j\leq N\f$</dd>
00063    * <dt>Initialisation:</dt>
00064    * <dd>\f$\tilde{\delta}_1(j)=\tilde{\pi}_i + \tilde{b}_j(O_1)\f$</dd>
00065    * <dd>\f$\psi_1(j)=0\f$</dd>
00066    * <dd>\f$1\leq j\leq N\f$</dd>
00067    * <dt>Recursion:</dt>
00068    * <dd>\f$\tilde{\delta}_t(j)=\displaystyle\min_{i}\left\{
00069    *     \tilde{\delta}_{t-1} + \tilde{a}_{ij} 
00070    *     \right\} + \tilde{b}_j(O_t)\f$</dd>
00071    * <dd>\f$\psi_t(j)=argmin_{i}\left\{\tilde{\delta}_{t-1} +
00072    * \tilde{a}_{ij}\right\}\f$</dd>
00073    * <dd>\f$2\leq t\leq T$, $1\leq j\leq N\f$</dd>
00074    * <dt>Termination:</dt>
00075    * <dd>\f$Score(\mathbf{X}|\lambda) = -\ln P^*(\mathbf{X}|\lambda) =
00076    * \displaystyle\min_{j}\left\{\tilde{\delta}_T(j)\right\}\f$</dd>
00077    * <dd>\f$q^*_T=argmin_{j}\left\{\tilde{\delta}_T(j)\right\}\f$</dd>
00078    * <dd>\f$1\leq j\leq N\f$</dd>
00079    * <dt>Backtracking:</dt>
00080    * <dd>\f$q^*_t=\psi_{t+1}(q^*_{t+1})\f$</dd>
00081    * <dd>\f$1\leq t<T\f$</dd>
00082    * </dl>
00083    *  
00084    * <dl>
00085    * <dt>The score of the emission probability \f$b_j(\mathbf{x})\f$ is
00086    * approximated by the centre of the distribution, which provides the best
00087    * estimation of the observation, as follows:</dt>
00088    * <dd>\f$-\ln(b_j(\mathbf{x})) \approx \displaystyle\min_{m} \left(
00089    * -\ln c_{jm}-\ln b_{jm}(\mathbf{x})\right)\f$.</dd>
00090    * <dt>Thereby, the score \f$-\ln c_{jm}\f$ denotes the weight scores of the
00091    * mixture densities \f$b_j(\mathbf{x})\f$. 
00092    * All components \f$b_{jm}(\mathbf{x})\f$ of \f$b_j(\mathbf{x})\f$ are
00093    * modelled by either laplace or gaussian distributions.</dt>
00094    * <dt>Laplace:</dt>
00095    * <dd>\f$-\ln(b_{jm}(\mathbf{x})) = \sum\limits_{k=1}^d \left\{ \left|
00096    * \frac{x_k - \mu_{jkm}} {\sigma_{jkm}} \right| + ln(2\sigma_{jkm})
00097    * \right\}\f$</dd>
00098    * <dt>Gaussian:</dt>
00099    * <dd>\f$-\ln(b_{jm}(\mathbf{x})) = \frac{1}{2} \cdot \sum\limits_{k=1}^d
00100    * \left\{ \left( \frac{x_k - \mu_{jkm}} {\sigma_{jkm}} \right)^2 +
00101    * ln(2\pi\sigma_{jkm}^2) \right\}\f$</dd>
00102    * </dl>
00103    *
00104    * An alternative notation for the score of the best path
00105    * \f$\mathbf{q}^*\f$ is
00106    * \f$Score(\mathbf{X}|\lambda)=-\sum^{T-1}_{t=1}{\ln(a_{q_tq_{t-1}})}-
00107    * \sum^{T-1}_{t=1}{\ln(b_{q_t}(\mathbf{x}_t))}\f$.
00108    *
00109    * <b>Note</b>: Each state sequence \f$\mathbf{q}\f$ always starts in the 
00110    * first state \f$s_1\f$ and ends in the last state \f$s_N\f$, whereas
00111    * \f$N\f$ denotes the number of states \f$s_j\f$.
00112    * Thus, the termination of the Viterbi algorithm is implemented as
00113    * \f$Score(\mathbf{X}|\lambda)=\tilde{\delta}_T(N)\f$.
00114    *
00115    * <b>Note</b>: The preprocessing values \f$\tilde{\pi}_i\f$, 
00116    * \f$\tilde{b}_j(\mathbf{x})\f$ and \f$\tilde{a}_{ij}\f$ are usually
00117    * estimated over a given data set by an automatic training algorithm 
00118    * (see hmmTrainer).
00119    * 
00120    */
00121   class hmmViterbiPathSearch {
00122   public:
00123 
00124     /**
00125      * type of an observation sequence
00126      */
00127     typedef sequence<dvector> observationSequence;
00128 
00129     /**
00130      * default constructor
00131      */
00132     hmmViterbiPathSearch();
00133 
00134     /**
00135      * copy constructor
00136      * @param other the object to be copied
00137      */
00138     hmmViterbiPathSearch(const hmmViterbiPathSearch& other);
00139 
00140     /**
00141      * destructor
00142      */
00143     virtual ~hmmViterbiPathSearch();
00144 
00145     /**
00146      * returns the name of this type ("hmmViterbiPathSearch")
00147      */
00148     virtual const char* getTypeName() const;
00149 
00150     /**
00151      * copy data of "other" functor.
00152      * @param other the functor to be copied
00153      * @return a reference to this functor object
00154      */
00155     hmmViterbiPathSearch& copy(const hmmViterbiPathSearch& other);
00156 
00157 
00158 
00159     /**
00160      * Performs the Viterbi path search on a given (observed) data sequence and
00161      * returns its score.
00162      * @param model the Hidden Markov Model.
00163      * @param srcData contains the sequence of observed vectors.
00164      * @result score of the best path through the model for the given sequence.
00165      */
00166     double apply(const hiddenMarkovModel &model, const observationSequence &srcData);
00167 
00168     /**
00169      * Calculates score along a given path for a given (observed) data sequence.
00170      * @param model the Hidden Markov Model.
00171      * @param srcData contains the sequence of observed vectors.
00172      * @param path defines the applicable state sequence.
00173      * @result score of the given path.
00174      */
00175     double evaluatePath(const hiddenMarkovModel &model, const observationSequence &srcData, const ivector &path);
00176 
00177     /**
00178      * Calculates score along a given path for a given (observed) data sequence,
00179      * and additionally selects the specified density centers of the mixture densities
00180      * in the visited path.
00181      * @param model the hidden markov model
00182      * @param srcData contains the data sequence.
00183      * @param path defines the applicable state sequence.
00184      * @param densityPath contains the applicable mixture density center selection.
00185      * @result score of the given path.
00186      */
00187     double evaluatePath(const hiddenMarkovModel &model, const observationSequence &srcData,
00188                        const ivector &path, const ivector &densityPath);
00189 
00190     /**
00191      * Find out lowest possible score for the given model and a sequence of the specified length.
00192      * @param model the hidden markov model
00193      * @param pathLength length of the sequence
00194      * @return minimum score
00195      */
00196     double minimumPathScore(const hiddenMarkovModel &model, int pathLength) const;
00197 
00198 
00199     /**
00200      * find out lowest possible score of the given model for a given path.
00201      * @param model the hidden markov model
00202      * @param stateSelection the state selection along the given path
00203      * @param densitySelection the density selection along the given path. if this is empty, the best densities are chosen.
00204      * @return the minimum score of the given path.
00205      */
00206     double minimumPathScore(const hiddenMarkovModel &model, const ivector &stateSelection, const ivector &densitySelection = ivector()) const;
00207 
00208     /** @name State selection logging
00209      *  These methods can be used to enable/disable logging and access the selection of
00210      *  states \f$s_j\f$ during a call of apply().
00211      *  Default is "disabled", for efficiency reasons. Enable only if necessary.
00212      */
00213       //@{
00214 
00215         /**
00216          * Turns logging of state selection on and off.
00217          * This enables to review the state matrix and the path leading to the minimal score.
00218          *
00219          * By default, this is turned off for efficiency reasons (memory).
00220          */
00221         void logStateSelection(bool enable = true);
00222 
00223         /**
00224          * @return matrix of the best states leading to the minimal score.
00225          * If logging of state selection is turned off or a given path was evaluated, the matrix will be empty.
00226          */
00227         const matrix<int>& getBackpointerMatrix() const;
00228 
00229         /**
00230          * @return chosen path of the last run leading to the minimal score
00231          * If logging of state selection is turned off or a given path was evaluated, the vector will be empty.
00232          */
00233         const vector<int>& getStatePath();
00234 
00235       //@}
00236 
00237 
00238     /** @name Density selection logging
00239      *  These methods can be used to enable/disable logging and access the selection of
00240      *  densities during a call of apply(). Density selection stands for the centre \f$m\f$
00241      *  of the distribution \f$b_j(\mathbf{x})\f$, which provides the best estimation of
00242      *  the observation and is thus selected during the Viterbi path search.
00243      *  Default is "disabled", for efficiency reasons. Enable only if necessary.
00244      */
00245       //@{
00246 
00247         /**
00248          * Turns logging of density selection matrix on and off.
00249          * This enables to review the densities selected during evaluation of a sequence.
00250          *
00251          * By default, this is turned off for efficiency reasons (memory).
00252          */
00253         void logDensitySelection(bool enable = true);
00254 
00255         /**
00256          * @return matrix of the densities used in the last pass.
00257          * If logging of density selection is turned off or a given path was evaluated, the matrix will be empty.
00258          */
00259         const matrix<int>& getDensitySelectionMatrix() const;
00260 
00261         /**
00262          * @return densities selected for the chosen path.
00263          * If logging of density selection is turned off, the vector will be empty.
00264          */
00265         const vector<int>& getDensitySelectionPath();
00266 
00267       //@}
00268 
00269 
00270     /** @name Emission score logging
00271      *  These methods can be used to enable/disable logging and access the emission
00272      *  scores \f$-\ln(b_j(\mathbf{x}))\f$ during a call of apply().
00273      *  Default is "disabled", for efficiency reasons. Enable only if necessary.
00274      */
00275       //@{
00276 
00277         /**
00278          * Turns logging of emission score matrix on and off.
00279          * This enables to analyse the emission scores generated for each step and state
00280          *
00281          * By default, this is turned off for efficiency reasons (runtime and memory).
00282          */
00283         void logEmissionScore(bool enable = true);
00284 
00285         /**
00286          * @return matrix of the emission scores generated in the last pass.
00287          * If logging of emission scores is turned off or a given path was evaluated, the matrix will be empty.
00288          */
00289         const matrix<double>& getEmissionScoreMatrix() const;
00290 
00291         /**
00292          * @return emission scores generated for the chosen path.
00293          * If logging of emission score is turned off, the vector will be empty.
00294          */
00295         const vector<double>& getEmissionScorePath();
00296 
00297       //@}
00298 
00299     /**
00300      * @name Routines for online classification
00301      */
00302       //@{
00303 
00304         /**
00305          * perform initial transition and add emission score, i.e. initialisation of the
00306          * Viterbi algorithm
00307          * @return last entry of result vector
00308          */
00309         double initialStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result) const;
00310 
00311         /**
00312          * perform subsequent transition and add emission score, i.e. one iteration of the
00313          * Viterbi algorithm recursion
00314          * @return last entry of result vector
00315          */
00316         double subsequentStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result) const;
00317 
00318 
00319       //@}
00320 
00321 
00322   private:
00323 
00324 
00325     /**
00326      * clear logged data
00327      */
00328     void reset();
00329 
00330 
00331     /**
00332      * add initial transition vector to result
00333      * adding is done to allow for later implementation of observation chain recognition
00334      */
00335     void initialTransition(const hiddenMarkovModel &model, dvector &result) const;
00336 
00337     /**
00338      * calculate best transitions
00339      */
00340     void subsequentTransition(const hiddenMarkovModel &model, dvector &result, ivector &predecessors) const;
00341 
00342     /**
00343      * calculate emission score for given frame and add it to the result vector
00344      */
00345     void addEmissionScore(const hiddenMarkovModel &model, const dvector &frame,
00346                           dvector &result, ivector &densitySelection, dvector &scoreSelection) const;
00347 
00348     /**
00349      * perform initial transition and add emission score
00350      * @return last entry of result vector
00351      */
00352     double initialStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result,
00353                       ivector &densitySelection, dvector &scoreSelection) const;
00354 
00355     /**
00356      * perform subsequent transition and add emission score
00357      * @return last entry of result vector
00358      */
00359     double subsequentStep(const hiddenMarkovModel &model, const dvector &frame, dvector &result,
00360                          ivector &stateSelection, ivector &densitySelection, dvector &scoreSelection) const;
00361 
00362     // score functions
00363 
00364     /**
00365      * calculate score for given frame and density
00366      */
00367     double getDensityScore(const hiddenMarkovModel &model, const dvector &frame, const hiddenMarkovModel::singleDensity &dens) const;
00368 
00369     /**
00370      * calculate score for given frame and state, return best density
00371      */
00372     double getStateScoreAndDensity(const hiddenMarkovModel &model, const dvector &frame, const hiddenMarkovModel::mixtureDensity &state, int &dens) const;
00373 
00374     /**
00375      * gaussian Score (i.e. -ln(N(mean, variance)), where N is the normal distribution)
00376      */
00377     double gaussScore(const dvector &frame, const dvector &mean,
00378                      const dvector &variance, const dvector &featureWeights) const;
00379 
00380     /**
00381      * laplace Score (i.e. -ln(L(mean, variance)), where L is the laplace distribution)
00382      */
00383     double laplaceScore(const dvector &frame, const dvector &mean,
00384                        const dvector &variance, const dvector &featureWeights) const;
00385 
00386     /**
00387      * defines, whether state selection is logged
00388      */
00389     bool registerStateSelection;
00390 
00391     /**
00392      * backtrace matrix will be saved here
00393      */
00394     matrix<int> stateMatrix;
00395 
00396     /**
00397      * best path will be generated by getStatePath and then saved here
00398      */
00399     vector<int> statePath;
00400 
00401 
00402     /**
00403      * defines, whether density selection is logged
00404      */
00405     bool registerDensitySelection;
00406 
00407     /**
00408      * density selection matrix will be saved here
00409      */
00410     matrix<int> densityMatrix;
00411 
00412     /**
00413      * density selection path will be generated by getDensityPath and then saved here
00414      */
00415     vector<int> densityPath;
00416 
00417 
00418     /**
00419      * defines, whether emission scores are logged
00420      */
00421     bool registerEmissionScore;
00422 
00423     /**
00424      * emission score matrix will be saved here
00425      */
00426     matrix<double> emissionScoreMatrix;
00427 
00428     /**
00429      * emission score path will be generated by getEmissionScorePath and then saved here
00430      */
00431     vector<double> emissionScorePath;
00432 
00433 
00434 //    friend class hmmTrainer;
00435 
00436   private:
00437 
00438     int stateCount;
00439 
00440   };
00441 
00442 }
00443 
00444 #endif