LTI-Lib latest version v1.9 - last update 10 Apr 2010

ltiClassificationStatistics.h

00001 /*
00002  * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*----------------------------------------------------------------
00025  * project ....: LTI Digital Image/Signal Processing Library
00026  * file .......: ltiClassificationStatistics.h
00027  * authors ....: Pablo Alvarado, Peter Doerfler
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 08.10.2000
00030  * revisions ..: $Id: ltiClassificationStatistics.h,v 1.7 2006/02/07 18:12:58 ltilib Exp $
00031  */
00032 
00033 #ifndef _CLASSIFICATION_STATISTICS_H
00034 #define _CLASSIFICATION_STATISTICS_H
00035 
00036 #include "ltiIoObject.h"
00037 #include "ltiDynamicMatrix.h"
00038 #include "ltiClassifier.h"
00039 
00040 #include <string>
00041 #include <ctime>
00042 
00043 namespace lti {
00044 
00045   /**
00046    * This class allows the generation of classification statistics.
00047    *
00048    * This object will generate some confusion matrices and log-files, which
00049    * can help to the selection of classifier-parameters.
00050    *
00051    * The results of the classification will be stored directly into the user
00052    * specified files.
00053    *
00054    * Following example shows how to use this object in the statistics for
00055    * classification results.
00056    *
00057    * \code
00058    *
00059    * lti::rbf theNet;     // a rbf-Network
00060    *
00061    * // Training data set:
00062    * // Some points in a two-dimensional feature-space will belong to one
00063    * // of two classes: circle or square
00064    * lti::dmatrix train(10,2);
00065    * double trainData[20] = { 0, 3, // circle
00066    *                          1, 3, // circle
00067    *                          2, 3, // circle
00068    *                          0, 2, // circle
00069    *                          1, 2, // circle
00070    *                          2, 1, // square
00071    *                          3, 1, // square
00072    *                          4, 1, // square
00073    *                          3, 0, // square
00074    *                          4, 0};// square
00075    * train.fill(trainData);
00076    *
00077    * // the ids for the patterns in the training set
00078    * lti::ivector trainIds(10);
00079    * int trainClasses[10] = {314,314,314,314,314,4,4,4,4,4};
00080    * trainIds.fill(trainClasses);
00081    * 
00082    * // just to see what's happening when training...
00083    * lti::streamProgressInfo progBox;
00084    * theNet.setProgressObject(progBox);
00085    * 
00086    * // train the classifier
00087    * theNet.train(train,trainIds);
00088    *    
00089    * // ------------- STATISTICS --------------
00090    * 
00091    * // prepare the statistics object
00092    * lti::rbf::outputVector nnoutput; // output of the rbf network
00093    * lti::classificationStatistics statistics;
00094    * lti::classificationStatistics::parameters statParam;
00095    * 
00096    * // all statistic files should be stored in the "tmp" directory
00097    * statParam.path = "/tmp/";
00098    * 
00099    * // and of course, indicate the statistics-object which parameters
00100    * // should be used:
00101    * statistics.setParameters(statParam);
00102    *
00103    * // assign some human-readable names to the object-Ids
00104    * std::map<int,std::string> names;
00105    * names[314] = "Circle";
00106    * names[4]   = "Square";
00107    * 
00108    * statistics.setNames(names);
00109    * 
00110    * // now test some points of the 2D-feature space, and generate
00111    * // some statistics about the classification:
00112    * 
00113    * double x,y; // the coordinates of a point in the feature space
00114    * int realID; // for the statistics is required the real id of
00115    * // the given feature, to check if the classification is
00116    * // correct or not!
00117    * 
00118    * char buffer[1024]; // a temporary string buffer
00119    * 
00120    * lti::dvector feature(2);// a feature-vector
00121    *
00122    * for (y=0;y<=4.0;y+=0.5) {
00123    *   for (x=0;x<=4.0;x+=0.5) {
00124    *     realID = (y>x) ? 314 : 4; // generate the real id!
00125    *  
00126    *     feature.at(0)=x;          // initialize the feature vector
00127    *     feature.at(1)=y;
00128    *
00129    *     theNet.classify(feature,nnoutput);// classify the vector!
00130    *
00131    *     // generate a feature-name for the feature
00132    *     sprintf(buffer,"x: %.2f, y: %.2f",x,y);
00133    *
00134    *     // consider the actual feature in the statistics
00135    *     statistics.consider(nnoutput,realID,buffer);
00136    *   }
00137    * }
00138    *
00139    * statistics.flush(); // save all the statistics on disk!
00140    *
00141    * \endcode
00142    *
00143    * Two files will be created: /tmp/logfile.txt with the list of the
00144    * wrong classifications and /tmp/confusion.mat with the confusion
00145    * matrices.
00146    *
00147    * The confusion matrix file has following information:
00148    *
00149    * - How many classes were considered in the classification.
00150    * - The confusion matrix itself, which show at each row how many
00151    *   percent of the presentations of the given object were recognized
00152    *   as the given objects.  For example, the previous code will produce
00153    *   following matrix:
00154    *
00155    * \code
00156    * ---     Circle  Square correctSingle correctMultip correctReject ...
00157    * Circle   1.000   0.000      1.000000      0.000000      0.000000 ...
00158    * Square   0.222   0.778      0.777778      0.000000      0.000000 ...
00159    * \endcode
00160    *
00161    * which means that the circle were allways correctly recognized, and the
00162    * square was 22% wrong and 78% correctly classified.
00163    * - Total statistics.  Here will be listed how many classifications were
00164    *   considered in the statistics, the correct and wrong classifications and
00165    *   the rejections.
00166    * - The n-Best statistic indicates that the correct object was at the n-th
00167    *   position
00168    * - The shared n-Best position indicates how many of the classifications
00169    *   were shared between different classes.
00170    * - The mininal number of positions indicates how many positions need to
00171    *   be taken into consideration to assure a correct classification.
00172    */
00173   class classificationStatistics : public ioObject {
00174   public:
00175 
00176     // --------------------------------------------------
00177     // classificationStatistics::parameters
00178     // --------------------------------------------------
00179 
00180     /**
00181      * the parameters for the class classificationStatistics
00182      */
00183     class parameters : public ioObject {
00184     public:
00185       /**
00186        * default constructor
00187        */
00188       parameters();
00189 
00190       /**
00191        * copy constructor
00192        * @param other the parameters object to be copied
00193        */
00194       parameters(const parameters& other);
00195 
00196       /**
00197        * destructor
00198        */
00199       virtual ~parameters();
00200 
00201       /**
00202        * returns name of this type
00203        */
00204       const char* getTypeName() const;
00205 
00206       /**
00207        * copy the contents of a parameters object
00208        * @param other the parameters object to be copied
00209        * @return a reference to this parameters object
00210        */
00211       parameters& copy(const parameters& other);
00212 
00213       /**
00214        * copy data of "other" parameters
00215        */
00216       parameters& operator=(const parameters& other);
00217 
00218       /**
00219        * returns a pointer to a clone of the parameters
00220        */
00221       virtual parameters* clone() const;
00222 
00223       /**
00224        * write the parameters in the given ioHandler
00225        * @param handler the ioHandler to be used
00226        * @param complete if true (the default) the enclosing begin/end will
00227        *        be also written, otherwise only the data block will be written.
00228        * @return true if write was successful
00229        */
00230       virtual bool write(ioHandler& handler,const bool complete=true) const;
00231 
00232       /**
00233        * read the parameters from the given ioHandler
00234        * @param handler the ioHandler to be used
00235        * @param complete if true (the default) the enclosing begin/end will
00236        *        be also written, otherwise only the data block will be written.
00237        * @return true if write was successful
00238        */
00239       virtual bool read(ioHandler& handler,const bool complete=true);
00240 
00241 #     ifdef _LTI_MSC_6
00242       /**
00243        * this function is required by MSVC only, as a workaround for a
00244        * very awful bug, which exists since MSVC V.4.0, and still by
00245        * V.6.0 with all bugfixes (so called "service packs") remains
00246        * there...  This method is also public due to another bug, so please
00247        * NEVER EVER call this method directly: use read() instead
00248        */
00249       bool readMS(ioHandler& handler,const bool complete=true);
00250 
00251       /**
00252        * this function is required by MSVC only, as a workaround for a
00253        * very awful bug, which exists since MSVC V.4.0, and still by
00254        * V.6.0 with all bugfixes (so called "service packs") remains
00255        * there...  This method is also public due to another bug, so please
00256        * NEVER EVER call this method directly: use write() instead
00257        */
00258       bool writeMS(ioHandler& handler,const bool complete=true) const;
00259 #     endif
00260 
00261       // -------------------------------------------------------------
00262       // the parameters
00263       // -------------------------------------------------------------
00264 
00265       /**
00266        * Use class names in file.
00267        *
00268        * If true, the ids of the objects are substituted by names
00269        * given in the file specified by parameter 'namesFile'
00270        * (Default value: false).
00271        *
00272        * \b Note: this must be set to false if you intend to
00273        * specify the idToNamesMap manually by calling setNames().
00274        */
00275       bool useNames;
00276 
00277       /**
00278        * Filename with class names.
00279        *
00280        * Name of the file where the names for the objects can
00281        * be found (Default value "objectNames.nms"). 
00282        *
00283        * This name is not expected to have a path, since it will always be
00284        * seached in the directory specified in 'path'.  Thus, the names file
00285        * will be expected at parameters::path + parameters::namesFile.
00286        *
00287        * These files consist of two columns. The first containing
00288        * the object ids, the second the object names in double quotes and
00289        * separated by whitespaces e.g.
00290        * \code
00291        * 314 "Circle"
00292        * 4   "Square"
00293        * \endcode
00294        *
00295        * \b Note: only considered, if parameters useNames==true. Make
00296        * sure that the parameter path is also specified!
00297        */
00298       std::string namesFile;
00299 
00300       /**
00301        * Names in file first
00302        *
00303        * The order of the columns in the names file can be inverted setting
00304        * this parameter to true.  
00305        *
00306        * Set to true if each line contains name of object followed by id.
00307        *
00308        * Set to false if each line contains the id followed by the object name.
00309        *
00310        * Default value: false
00311        */
00312       bool namesInFileFirst;
00313 
00314       /**
00315        * If the confusion matrix is going to be read by humans, it is
00316        * usually better if "-" are used instead of zeros.  
00317        * If you need other tools (like Matlab, Excel, etc.) to read the
00318        * confusion matrix files, you need the zeros.
00319        * 
00320        * Default value: true
00321        */
00322       bool suppressZeros;
00323 
00324       /**
00325        * if true, the confusion matrix will be saved automatically
00326        * in the file specified by parameter 'confusionMatrix'. Saving
00327        * intervals are defined by parameter 'saveStep' and on
00328        * destruction (Default value: true).
00329        */
00330       bool writeConfusionMatrix;
00331 
00332       /**
00333        * name of the file, where the confusion matrix will be saved
00334        * (Default value "confusion.mat"). Will be appended to
00335        * parameter 'path'.
00336        *
00337        * \b Note: only considered, if parameter writeConfusionMatrix==true.
00338        */
00339       std::string confusionMatrix;
00340 
00341       /**
00342        * specify the intervall of entries for automatic saving of
00343        * the confusion-matrix (Default value: 100, i.e. each 100 entries
00344        * the confusion-mtrix will be saved automatically). Alternatively
00345        * you may call flush() to force write at any moment.
00346        *
00347        * \b Note: only considered, if parameter writeConfusionMatrix==true.
00348        */
00349       int saveStep;
00350 
00351       /**
00352        * use a protocol file, where each entry will be registered.
00353        * (Default value: true)
00354        */
00355       bool useLogFile;
00356 
00357       /**
00358        * name for the protocol file (Default value "logfile.txt").
00359        * Will be appended to parameter 'path'.
00360        *
00361        * \b Note: only considered, if parameter useLogFile==true.
00362        */
00363       std::string logFile;
00364 
00365       /**
00366        * if true, all patterns will be registered in the log file, if false,
00367        * only the errors will be registered (Default value: false).
00368        *
00369        * \b Note: only considered, if parameter useLogFile==true.
00370        */
00371       bool logAllPatterns;
00372 
00373       /**
00374        * The "path" string will be concatenated before all filenames
00375        * (Default value: "", i.e. working directory).
00376        */
00377       std::string path;
00378     };
00379 
00380     /**
00381      * types of classification results
00382      */
00383     enum eStatistics {
00384       CorrectSingle  =-1, /*!< classifier chooses one class and it is the
00385                                correct one */
00386       CorrectMultiple=-2, /*!< classifier chooses many classes and one of
00387                                them is correct */
00388       CorrectReject  =-3, /*!< classifier rejects one class which is indeed
00389                                not the winner */
00390       WrongSingle    =-4, /*!< classifier chooses one class and it is wrong */
00391       WrongMultiple  =-5, /*!< classifier chooses many classes and all of
00392                                theme are wrong */
00393       WrongReject    =-6, /*!< classifier rejects a class which would have
00394                                been the correct winner */
00395       Error          =-7  /*!< there is an error on the data */
00396     };
00397 
00398 
00399     /**
00400      * default constructor
00401      */
00402     classificationStatistics();
00403 
00404     /**
00405      * default constructor with parameters
00406      */
00407     classificationStatistics(const parameters& par);
00408 
00409     /**
00410      * Destructor
00411      */
00412     ~classificationStatistics();
00413 
00414     /**
00415      * set the parameters
00416      */
00417     bool setParameters(const parameters& params);
00418 
00419     /**
00420      * returns a const reference to the parameters
00421      */
00422     const parameters& getParameters() const;
00423 
00424     /**
00425      * include a new classification result into the statistics.
00426      * The patternInfo is additional text that will appear in the
00427      * logFile. Returns the recognition state (see eStatistics).
00428      *
00429      * \b Note: the ID must be positive. negative indices are discarded.
00430      */
00431     eStatistics consider(const classifier::outputVector& result,
00432                          const int& realObjectID,
00433                          const std::string& patternInfo = "-?-");
00434 
00435     /**
00436      * reset clears the statistics
00437      */
00438     void reset();
00439 
00440     /**
00441      * save all statistics stored by now into the file specified in the
00442      * parameters. Note that the parameter writeConfusionMatrix must be
00443      * set to true.
00444      */
00445     void flush();
00446 
00447     /**
00448      * write all accumulated statistic data to the specified stream.
00449      * this can be used to manually save the statistics.
00450      */
00451     bool writeData(std::ostream& ostr);
00452 
00453     /**
00454      * load the file "filename", which contains the correspondences between
00455      * a class id number and a class name.
00456      *
00457      * \b Note: calling this does NOT set the internal idToNamesMap!!!
00458      *
00459      * The file must consist of two columns. The first containing
00460      * the object ids, the second the object names (optionally in double
00461      * quotes) and separated by whitespaces e.g.
00462      * \code
00463      * 314 "Circle"
00464      * 4   "Square"
00465      * \endcode
00466      *
00467      * @param filename the name of the file to be used
00468      * @param idToNamesMap the map where the correspondences will be written.
00469      *                     all previous data in the map will be kept.
00470      * @param namesInFileFirst invert the expected order of the data.
00471      */
00472     static bool loadNames(const std::string& filename,
00473                           std::map<int,std::string>& idToNamesMap,
00474                           const bool namesInFileFirst=false);
00475 
00476 
00477     /**
00478      * set the correspondences between a class id number and a class name.
00479      *
00480      * @param idMap the map where the correspondences are given.
00481      */
00482     bool setNames(const std::map<int,std::string>& idMap);
00483 
00484 
00485     /** @name Data access members
00486      *        Use these to access the internal data members
00487      */
00488     //@{
00489 
00490       /** returns a const reference to protected member "saved"
00491        */
00492       const bool& getSaved() const;
00493 
00494       /** returns a const reference to the confusion matrix
00495        */
00496       const dynamicMatrix<double>& getConfusionMatrix() const;
00497 
00498       /** returns a const reference to the idToNamesMap
00499        */
00500       const std::map<int,std::string>& getIdToNamesMap() const;
00501 
00502     //@}
00503 
00504 
00505     /** @name Statistics access members
00506      *        Use these to access the accumulated statistics
00507      */
00508     //@{
00509 
00510       /** returns a const reference to the number of results
00511        */
00512       const int& getNumberOfResults() const;
00513 
00514       /** number of correct singles
00515        */
00516       const int& getCorrectSingle() const;
00517 
00518       /** number of correct multiples
00519        */
00520       const int& getCorrectMultiple() const;
00521 
00522       /** number of correct rejects
00523        */
00524       const int& getCorrectReject() const;
00525 
00526       /** number of wrong singles
00527        */
00528       const int& getWrongSingle() const;
00529 
00530       /** number of wrong multiples
00531        */
00532       const int& getWrongMultiple() const;
00533 
00534       /** number of wrong rejects
00535        */
00536       const int& getWrongReject() const;
00537 
00538       /** nBest recognition results for single correct
00539       */
00540       const dynamicMatrix<double>& getNBestMatrix() const;
00541 
00542       /** nBest recognition results for multiple correct
00543       */
00544       const dynamicMatrix<double>& getNBestMultipleMatrix() const;
00545 
00546       /** minPlaces, i.e. minimum number of elements required in order to
00547       *  contain the proper class.
00548       */
00549       const dynamicMatrix<double>& getMinPlacesMatrix() const;
00550 
00551     //@}
00552 
00553 
00554   protected:
00555 
00556     /**
00557      * save all statistic data in the respective files
00558      */
00559     bool writeData();
00560 
00561     /**
00562      * write confusion matrix to file
00563      */
00564     bool writeConfusionMatrix(std::ostream& out,
00565                               const dynamicMatrix<double>& matrix);
00566 
00567     /**
00568      * write nBest-matrix
00569      */
00570     bool writeNBestMatrix(std::ostream& out,
00571                           const dynamicMatrix<double>& matrix,
00572                           const std::string& title,
00573                           const bool& normalize,
00574                           const std::string& labelSingular = "Best",
00575                           const std::string& labelPlural = "Best");
00576 
00577     /**
00578      * write classification to file
00579      */
00580     bool writeLogFileHeader(const std::string& filename);
00581 
00582     /**
00583      * write classification to file
00584      */
00585     bool writeLogFile(const classifier::outputVector& result,
00586                       const eStatistics& rState,
00587                       const int& realObjectID,
00588                       const std::string& patternInfo);
00589 
00590     /**
00591      * sort the object names in the matrices
00592      */
00593     void sortObjNames(const dynamicMatrix<double>& matrix,
00594                       std::vector<int>& seqToRealId);
00595 
00596     /**
00597      * workaround for std::ios bug in gcc when writing double numbers
00598      */
00599     void format(std::ostream& out,
00600                 const int& width,
00601                 const double& val) const;
00602 
00603 
00604     /**
00605      * workaround for std::ios bug in gcc when writing double numbers
00606      */
00607     void format(std::ostream& out,
00608                 const int& width,
00609                 const int& val) const;
00610 
00611     /**
00612      * use in the statistics an object with the given name and real id number
00613      */
00614     bool inscribeObject(const std::string& name, const int& id);
00615 
00616 
00617   protected:
00618 
00619     /**
00620      * store parameters
00621      */
00622     parameters param;
00623 
00624     /**
00625      * if true, all data has been saved.  If false, some consider() have been
00626      * called, and there are unsaved information.
00627      */
00628     bool saved;
00629 
00630     /**
00631      * the confusion matrix stores the classification results, each
00632      * row represents one class (row index = realObjectID) and the
00633      * classification results for this class
00634      */
00635     dynamicMatrix<double> confMatrix;
00636 
00637 
00638     /**
00639      * map an id number to the name of the object.
00640      */
00641     std::map<int,std::string> idToNamesMap;
00642 
00643 
00644     /** @name Statistics
00645      *        These are the internal members for storing the actual classification statistics
00646      */
00647     //@{
00648 
00649       /** number of "consider(...)" called so far. Divide any value, e.g. correctSingle,
00650        *  by this to obtain recognition rates.
00651        */
00652       int numberOfResults;
00653 
00654       /** number of correct singles
00655        */
00656       int correctSingle;
00657 
00658       /** number of correct multiples
00659        */
00660       int correctMultiple;
00661 
00662       /** number of correct rejects
00663        */
00664       int correctReject;
00665 
00666       /** number of wrong singles
00667        */
00668       int wrongSingle;
00669 
00670       /** number of wrong multiples
00671        */
00672       int wrongMultiple;
00673 
00674       /** number of wrong rejects
00675        */
00676       int wrongReject;
00677 
00678       // matrices with statistics per object, which tell how many n-best
00679       // classification have been done already
00680 
00681       /** nBest recognition results for single correct
00682        */
00683       dynamicMatrix<double> nBest;
00684 
00685       /** nBest recognition results for multiple correct
00686        */
00687       dynamicMatrix<double> nBestMultiple;
00688 
00689       /** minPlaces, i.e. minimum number of elements required in order to
00690        *  contain the proper class.
00691        */
00692       dynamicMatrix<double> minPlaces;
00693 
00694     //@}
00695 
00696     /**
00697      * formatting widths for the columns of the log file
00698      */
00699     static const int logWidths[];
00700 
00701     /**
00702      * formatting widths for the columns of the confusion matrix file
00703      */
00704     static const int confWidths[];
00705 
00706   };
00707 
00708 
00709 };
00710 
00711 #endif

Generated on Sat Apr 10 15:25:13 2010 for LTI-Lib by Doxygen 1.6.1