ltiHmmTrainer.h

00001 /*
00002  * Copyright (C) 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 /*--------------------------------------------------------------------
00024  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00025  * file .......: ltiHmmTrainer.h
00026  * authors ....: Benjamin Winkler
00027  * organization: LTI, RWTH Aachen
00028  * creation ...: 7.1.2002
00029  * revisions ..: $Id: ltiHmmTrainer.h,v 1.9 2006/02/08 12:27:23 ltilib Exp $
00030  */
00031 
00032 #ifndef _LTI_HMM_TRAINER_H_
00033 #define _LTI_HMM_TRAINER_H_
00034 
00035 #include <vector>
00036 
00037 #include "ltiFunctor.h"
00038 #include "ltiHmmViterbiPathSearch.h"
00039 
00040 namespace lti {
00041   /**
00042    * Hidden Markov Model (HMM) trainer.
00043    * This class creates a hiddenMarkovModel from a given training set of
00044    * feature vector sequences.
00045    *
00046    * By default a bakis-topology is chosen (0-1-2 transitions), but this
00047    * can be changed by redefining minJump and maxJump.
00048    *
00049    * The Viterbi training algorithm, aka segmental K-means, and a mixture
00050    * density estimator are used to determine the HMM %parameters, starting
00051    * from the user-defined %parameters.
00052    *
00053    * See: B.-H. Juang and L. Rabiner, "The segmentation K-means algorithm for
00054    * estimating %parameters of hidden markov models", IEEE ASSP Magazine,
00055    * no.9, vol.38, pp.1639-1641,1990.
00056    *
00057    * <b>Note</b>: for computational efficiency probabilities p are not
00058    * explicitly computed but represented by <b>scores</b>
00059    * (~ -ln(p)). Note that a high score corresponds to a low probability,
00060    * i.e. it actually resembles a cost.
00061    */
00062   class hmmTrainer : public functor {
00063   public:
00064 
00065     /**
00066      * the parameters for the class hmmTrainer
00067      */
00068     class parameters : public functor::parameters {
00069     public:
00070       /**
00071        * default constructor
00072        */
00073       parameters();
00074 
00075       /**
00076        * copy constructor
00077        * @param other the parameters object to be copied
00078        */
00079       parameters(const parameters& other);
00080 
00081       /**
00082        * destructor
00083        */
00084       ~parameters();
00085 
00086       /**
00087        * returns name of this type
00088        */
00089       const char* getTypeName() const;
00090 
00091       /**
00092        * copy the contents of a parameters object
00093        * @param other the parameters object to be copied
00094        * @return a reference to this parameters object
00095        */
00096       parameters& copy(const parameters& other);
00097 
00098       /**
00099        * copy the contents of a parameters object
00100        * @param other the parameters object to be copied
00101        * @return a reference to this parameters object
00102        */
00103       parameters& operator=(const parameters& other);
00104 
00105 
00106       /**
00107        * returns a pointer to a clone of the parameters
00108        */
00109       virtual functor::parameters* clone() const;
00110 
00111       /**
00112        * write the parameters in the given ioHandler
00113        * @param handler the ioHandler to be used
00114        * @param complete if true (the default) the enclosing begin/end will
00115        *        be also written, otherwise only the data block will be written.
00116        * @return true if write was successful
00117        */
00118       virtual bool write(ioHandler& handler,const bool complete=true) const;
00119 
00120       /**
00121        * read the parameters from the given ioHandler
00122        * @param handler the ioHandler to be used
00123        * @param complete if true (the default) the enclosing begin/end will
00124        *        be also written, otherwise only the data block will be written.
00125        * @return true if write was successful
00126        */
00127       virtual bool read(ioHandler& handler,const bool complete=true);
00128 
00129 #     ifdef _LTI_MSC_6
00130       /**
00131        * this function is required by MSVC only, as a workaround for a
00132        * very awful bug, which exists since MSVC V.4.0, and still by
00133        * V.6.0 with all bugfixes (so called "service packs") remains
00134        * there...  This method is also public due to another bug, so please
00135        * NEVER EVER call this method directly: use read() instead
00136        */
00137       bool readMS(ioHandler& handler,const bool complete=true);
00138 
00139       /**
00140        * this function is required by MSVC only, as a workaround for a
00141        * very awful bug, which exists since MSVC V.4.0, and still by
00142        * V.6.0 with all bugfixes (so called "service packs") remains
00143        * there...  This method is also public due to another bug, so please
00144        * NEVER EVER call this method directly: use write() instead
00145        */
00146       bool writeMS(ioHandler& handler,const bool complete=true) const;
00147 #     endif
00148 
00149       // ------------------------------------------------
00150       // the parameters
00151       // ------------------------------------------------
00152 
00153       /**
00154        * Pooling modes
00155        */
00156       enum poolingType {
00157         noPooling,   /*!< don't pool variances */
00158         statePooling,/*!< pool variances within one state */
00159         modelPooling /*!< pool variances of a dimension for all states */
00160       };
00161 
00162       /**
00163        * Estimator types:
00164        */
00165       enum estimatorType {
00166         estimMaximumLikelihood, /*!< select estimator for given score
00167                                   function (default) */
00168         estimStandardDeviation, /*!< standard deviation (gaussian
00169                                   maximum-likelihood) */
00170         estimMeanDeviation,     /*!< mean absolute deviation
00171                                   (laplacian maximum-likelihood) */
00172         estimMeanDeviationRoot  /*!< root of mean absolute deviation
00173                                   (laplacian maximum-likelihood) */
00174       };
00175 
00176       /**
00177        * special definitions for numberOfStates
00178        */
00179       enum {
00180         minimumSequenceLength = 0, /*!< uses the minimum length of all
00181                                      sequences as number of states */
00182         averageSequenceLength = -1 /*!< uses the average length of all
00183                                      sequences as number of states */
00184       };
00185 
00186 
00187       /** @name Training termination
00188        *  These members are the training iteration control %parameters.
00189        *  <b>For experienced users!</b>
00190        */
00191       //@{
00192 
00193         /**
00194          * Maximum number of training iterations.
00195          *
00196          * Default is 0 (iterate, until convergenceThreshold is reached).
00197          */
00198         int maxIterations;
00199 
00200         /**
00201          * Estimation of the HMM %parameters is finished, when score
00202          * enhancement within one iteration is below or equal to
00203          * convergence threshold for given training data.
00204          *
00205          * Default is 0.0 (iterate, while model score is improving)
00206          */
00207         double convergenceThreshold;
00208 
00209       //@}
00210 
00211 
00212     /** @name HMM attributes
00213      *  These members are the model attributes.
00214      */
00215       //@{
00216 
00217         /**
00218          * Number of states of the model to be generated.
00219          *
00220          * A value of <b>minimumSequenceLength</b> (0) indicates to
00221          * pick the size of the shortest observation sequence,
00222          * <b>averageSequenceLength</b> (-1) picks the average size of
00223          * all considered observation sequences.
00224          *
00225          * Default: minimumSequenceLength (0)
00226          */
00227         int numberOfStates;
00228 
00229         /**
00230          * Minimum relative forward jump within HMM.
00231          *
00232          * Default: 0 , i.e. loop to current state
00233          */
00234         int minimumJump;
00235 
00236         /**
00237          * Maximum relative forward jump within HMM.
00238          *
00239          * Default: 2
00240          */
00241         int maximumJump;
00242 
00243         /**
00244          * Vector of initial scores.
00245          * If the size of this vector differs from numberOfStates, the default
00246          * initialScore is used:
00247          *
00248          * <code>
00249          * <TABLE>
00250          *  <TR> <TD>            </TD><TD>state 0 </TD><TD>other states</TD> </TR>
00251          *  <TR> <TD><b>score</b></TD><TD><b>0</b></TD><TD><b>oo</b>   </TD> </TR>
00252          *  <TR> <TD>(prob)      </TD><TD>(1)     </TD><TD>(0)         </TD> </TR>
00253          * </TABLE>
00254          * </code>
00255          *
00256          * Remember to specify the initialScore as <b>scores</b>
00257          * (i.e. ~ -ln(p)). Note that the initialScore vector doesn't have
00258          * to be normalized.
00259          */
00260         dvector initialScore;
00261       //@}
00262 
00263 
00264     /** @name Emission density
00265      *
00266      *  %Parameters defining the kind of emission density and the
00267      *  estimator to use.
00268      */
00269       //@{
00270 
00271         /**
00272          * Density function type to be used for the emission scores.
00273          *
00274          * Default: laplaceScore
00275          */
00276         hiddenMarkovModel::scoreType scoreFunction;
00277 
00278         /**
00279          * Method for estimating the scaling factor (ie spread) of the
00280          * density functions.
00281          *
00282          * Default: estimMaximumLikelihood
00283          */
00284         estimatorType estimatorFunction;
00285 
00286         /**
00287          * Minimum scaling factor. This is required to avoid very
00288          * narrow and high density functions, which are obtained, when
00289          * variation in data is too little.
00290          *
00291          * Default is 0.1
00292          */
00293         double minScalingFactor;
00294 
00295       //@}
00296 
00297     /** @name Splitting (mixture densities)
00298      *  These members can be adjusted to change the splitting process, where
00299      *  the density functions are successively divided into smaller ones to
00300      *  describe the actual distribution better.
00301      *
00302      *  <b>For intermediate users!</b>
00303      */
00304       //@{
00305 
00306         /**
00307          * Maximum allowed number of densities per state.
00308          *
00309          * Default = 5
00310          */
00311         int maxDensities;
00312 
00313         /**
00314          * Number of iterations between two split processes.
00315          *
00316          * A value of 0 will reassign observations until no further
00317          * changes occur (nope, not yet!).
00318          *
00319          * The default value is 5 (split densities at every 5th iteration).
00320          */
00321         int reassignmentIterations;
00322 
00323         /**
00324          * Number of iteration, where splitting is taken into
00325          * consideration for the first time.
00326          *
00327          * By default, this is set to 0.
00328          */
00329         int firstSplit;
00330 
00331         /**
00332          * Pooling means to compute a common scaling factor (spread,
00333          * variance) for several densities. You can select to have no
00334          * pooling, state pooling or model pooling.
00335          *
00336          * Default: noPooling
00337          */
00338         poolingType pooling;
00339 
00340         /**
00341          * Minimum number of observations assigned to a density to be
00342          * splittable.
00343          *
00344          * Default: 3
00345          */
00346         int minFramesForSplit;
00347 
00348       //@}
00349 
00350 
00351     };
00352 
00353     /**
00354      * default constructor
00355      */
00356     hmmTrainer();
00357 
00358     /**
00359      * copy constructor
00360      * @param other the object to be copied
00361      */
00362     hmmTrainer(const hmmTrainer& other);
00363 
00364     /**
00365      * destructor
00366      */
00367     virtual ~hmmTrainer();
00368 
00369     /**
00370      * returns the name of this type ("hmmTrainer")
00371      */
00372     virtual const char* getTypeName() const;
00373 
00374     /**
00375      * Discard sequences considered so far.
00376      */
00377     void reset();
00378 
00379     /**
00380      * Consider this sequence for computing the hidden markov model.
00381      * The sequence is added to an internal data store.
00382      */
00383     void consider(const sequence<dvector> &theSeq);
00384 
00385     /**
00386      * Consider multiple sequences for computing the hidden markov model.
00387      * The sequences are added to an internal data store.
00388      */
00389     void consider(const std::vector< sequence<dvector> > &theSequences);
00390 
00391     /**
00392      * Generate hiddenMarkovModel from the data in the internal data store,
00393      * that has been filled with the consider() members so far.
00394      */
00395     void apply(hiddenMarkovModel &model);
00396 
00397     /**
00398      * Get the progress of the total score over training data. The returned
00399      * vector contains the scores of each iteration as a result of the training
00400      * (apply()).
00401      */
00402     dvector getScoreProgressSequence() {
00403       return scoreSequence;
00404     }
00405 
00406     /**
00407      * copy data of "other" functor.
00408      * @param other the functor to be copied
00409      * @return a reference to this functor object
00410      */
00411     hmmTrainer& copy(const hmmTrainer& other);
00412 
00413     /**
00414      * alias for copy member
00415      * @param other the functor to be copied
00416      * @return a reference to this functor object
00417      */
00418     hmmTrainer& operator=(const hmmTrainer& other);
00419 
00420     /**
00421      * returns a pointer to a clone of this functor.
00422      */
00423     virtual functor* clone() const;
00424 
00425     /**
00426      * returns used parameters
00427      */
00428     const parameters& getParameters() const;
00429 
00430     //TODO: comment the attributes of your functor
00431     // If you add more attributes manually, do not forget to do following:
00432     // 1. indicate in the default constructor the default values
00433     // 2. make sure that the copy member also copy your new attributes, or
00434     //    to ensure there, that these attributes are properly initialized.
00435 
00436 
00437   private:
00438 
00439     /**
00440      * this private class defines an assignment of frames to a density
00441      * overall scores of the density are also saved here
00442      */
00443     class densityAssignment {
00444     public:
00445 
00446       /**
00447        * pointer to the frames (observations) assigned to this density
00448        */
00449       std::vector<const dvector *> frames;
00450 
00451       /**
00452        * calculated density center is written here.
00453        */
00454       hiddenMarkovModel::singleDensity center;
00455 
00456       /**
00457        * average score for this density
00458        */
00459       double score;
00460     };
00461 
00462     /**
00463      * assignment of densities to a state
00464      */
00465     typedef std::vector<densityAssignment> stateAssignment;
00466 
00467     /**
00468      * improve given model with respect to the training sequences
00469      * (might become public)
00470      */
00471     double improveModel(hiddenMarkovModel &model, bool split);
00472 
00473     /**
00474      * change state number, assign frames to state/density in a linear fashion
00475      */
00476     void initializeAssignments();
00477 
00478     /**
00479      * clears all assignments of densities/frames (does not change the
00480      * number of states or densities)
00481      */
00482     void clearAssignments();
00483 
00484     /**
00485      * initialize the model
00486      */
00487     void initializeModel(hiddenMarkovModel &model);
00488 
00489     /**
00490      * copy parameters and modify them if necessary
00491      */
00492     void copyParameters();
00493 
00494     /**
00495      * calculate transition matrix, i.e. do smoothing and -log()
00496      */
00497     void calculateTransitionMatrix(matrix<double> &transitionScore);
00498 
00499     /**
00500      * test model on training data, return average score
00501      */
00502     double evaluateModel(const hiddenMarkovModel &model);
00503 
00504     /**
00505      * create model from the current transitions and assignments
00506      */
00507     void createModel(hiddenMarkovModel &model);
00508 
00509     /**
00510      * calculate mean values and variances
00511      */
00512     void calculateDensities();
00513 
00514     /**
00515      * calculate single density
00516      */
00517     void calculateThisDensity(densityAssignment &theDensity);
00518 
00519     /**
00520      * calculate transition scores from the viterbi search
00521      * return achieved score
00522      */
00523     double considerTrainingSequence(const hiddenMarkovModel &lastModel,
00524                                     const sequence<dvector> &theSeq);
00525 
00526     /**
00527      * divide emission scores by number of assigned frames
00528      * (i.e. number of emissions)
00529      */
00530     void normalizeEmissionScores();
00531 
00532     /**
00533      * check density scores and split if appropriate
00534      * return true, if new densities were actually created
00535      */
00536     bool splitDensities();
00537 
00538     /**
00539      * split one density center by disturbance of the center
00540      */
00541     void splitThisDensity(densityAssignment &thisDensity,
00542                           densityAssignment &newDensity);
00543 
00544     /**
00545      * delete empty densities and states
00546      */
00547     void garbageCollection();
00548 
00549     /**
00550      * delete state with the given number
00551      */
00552     void deleteState(const int stateNumber);
00553 
00554     /**
00555      * delete state with the given number
00556      */
00557     void deleteDensity(const int stateNumber, const int densityCenter);
00558 
00559     /**
00560      * viterbi path search
00561      */
00562     hmmViterbiPathSearch viterbi;
00563 
00564     /**
00565      * training data will be saved here
00566      */
00567     sequence< sequence<dvector> > theTrainingData;
00568 
00569     /**
00570      * assignments of all states and densities
00571      * assignedFrames[state][density]
00572      */
00573     std::vector<stateAssignment> assignedFrames;
00574 
00575     /**
00576      * score sequence is saved here by apply
00577      */
00578     dvector scoreSequence;
00579 
00580 
00581 
00582     /**
00583      * length of the shortest sequence will be saved here
00584      */
00585     int minSequenceLength;
00586 
00587     /**
00588      * total number of frames in all considered sequences is saved
00589      * here to quickly calculate the average sequence length, if
00590      * needed.
00591      */
00592     int totalFramesInSequences;
00593 
00594     /**
00595      * temporary copy of parameters
00596      */
00597     parameters params;
00598 
00599     /**
00600      * feature dimension.
00601      * will be saved here by the first call to consider.
00602      * sequences will then be checked for consecutive calls to consider.
00603      */
00604     int featureDimension;
00605 
00606     /**
00607      * feature weights
00608      */
00609     dvector featureWeights;
00610 
00611     /**
00612      * transition frequency
00613      * for every transition from state i to j, trans[i][j] is increased.
00614      * calculateTransitionMatrix creates score matrix from this matrix
00615      */
00616     matrix<int> transitionFrequency;
00617 
00618 
00619     // calculation of mean and scaling factor (i.e. deviation etc.)
00620 
00621     /**
00622      * calculate mean
00623      */
00624     void calculateDensityCenter(const std::vector<const dvector *> &frames,
00625                                       dvector &mean) const;
00626 
00627     /**
00628      * reset scaling factor calculation
00629      */
00630     void resetScaleCalculation();
00631 
00632     /**
00633      * scaling factor calculation: take vector into consideration
00634      */
00635     void considerScale(const dvector &mean, dvector vec);
00636 
00637     /**
00638      * calculate scaling factor from considered vectors
00639      */
00640     void calculateScale(dvector &deviation);
00641 
00642     // scale calculation variables
00643     dvector scaleVector;
00644     int scaleConsidered;
00645 
00646   };
00647 }
00648 
00649 #endif