latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*---------------------------------------------------------------- 00025 * project ....: LTI Digital Image/Signal Processing Library 00026 * file .......: ltiClassificationStatistics.h 00027 * authors ....: Pablo Alvarado, Peter Doerfler 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 08.10.2000 00030 * revisions ..: $Id: ltiClassificationStatistics.h,v 1.7 2006/02/07 18:12:58 ltilib Exp $ 00031 */ 00032 00033 #ifndef _CLASSIFICATION_STATISTICS_H 00034 #define _CLASSIFICATION_STATISTICS_H 00035 00036 #include "ltiIoObject.h" 00037 #include "ltiDynamicMatrix.h" 00038 #include "ltiClassifier.h" 00039 00040 #include <string> 00041 #include <ctime> 00042 00043 namespace lti { 00044 00045 /** 00046 * This class allows the generation of classification statistics. 00047 * 00048 * This object will generate some confusion matrices and log-files, which 00049 * can help to the selection of classifier-parameters. 00050 * 00051 * The results of the classification will be stored directly into the user 00052 * specified files. 00053 * 00054 * Following example shows how to use this object in the statistics for 00055 * classification results. 00056 * 00057 * \code 00058 * 00059 * lti::rbf theNet; // a rbf-Network 00060 * 00061 * // Training data set: 00062 * // Some points in a two-dimensional feature-space will belong to one 00063 * // of two classes: circle or square 00064 * lti::dmatrix train(10,2); 00065 * double trainData[20] = { 0, 3, // circle 00066 * 1, 3, // circle 00067 * 2, 3, // circle 00068 * 0, 2, // circle 00069 * 1, 2, // circle 00070 * 2, 1, // square 00071 * 3, 1, // square 00072 * 4, 1, // square 00073 * 3, 0, // square 00074 * 4, 0};// square 00075 * train.fill(trainData); 00076 * 00077 * // the ids for the patterns in the training set 00078 * lti::ivector trainIds(10); 00079 * int trainClasses[10] = {314,314,314,314,314,4,4,4,4,4}; 00080 * trainIds.fill(trainClasses); 00081 * 00082 * // just to see what's happening when training... 00083 * lti::streamProgressInfo progBox; 00084 * theNet.setProgressObject(progBox); 00085 * 00086 * // train the classifier 00087 * theNet.train(train,trainIds); 00088 * 00089 * // ------------- STATISTICS -------------- 00090 * 00091 * // prepare the statistics object 00092 * lti::rbf::outputVector nnoutput; // output of the rbf network 00093 * lti::classificationStatistics statistics; 00094 * lti::classificationStatistics::parameters statParam; 00095 * 00096 * // all statistic files should be stored in the "tmp" directory 00097 * statParam.path = "/tmp/"; 00098 * 00099 * // and of course, indicate the statistics-object which parameters 00100 * // should be used: 00101 * statistics.setParameters(statParam); 00102 * 00103 * // assign some human-readable names to the object-Ids 00104 * std::map<int,std::string> names; 00105 * names[314] = "Circle"; 00106 * names[4] = "Square"; 00107 * 00108 * statistics.setNames(names); 00109 * 00110 * // now test some points of the 2D-feature space, and generate 00111 * // some statistics about the classification: 00112 * 00113 * double x,y; // the coordinates of a point in the feature space 00114 * int realID; // for the statistics is required the real id of 00115 * // the given feature, to check if the classification is 00116 * // correct or not! 00117 * 00118 * char buffer[1024]; // a temporary string buffer 00119 * 00120 * lti::dvector feature(2);// a feature-vector 00121 * 00122 * for (y=0;y<=4.0;y+=0.5) { 00123 * for (x=0;x<=4.0;x+=0.5) { 00124 * realID = (y>x) ? 314 : 4; // generate the real id! 00125 * 00126 * feature.at(0)=x; // initialize the feature vector 00127 * feature.at(1)=y; 00128 * 00129 * theNet.classify(feature,nnoutput);// classify the vector! 00130 * 00131 * // generate a feature-name for the feature 00132 * sprintf(buffer,"x: %.2f, y: %.2f",x,y); 00133 * 00134 * // consider the actual feature in the statistics 00135 * statistics.consider(nnoutput,realID,buffer); 00136 * } 00137 * } 00138 * 00139 * statistics.flush(); // save all the statistics on disk! 00140 * 00141 * \endcode 00142 * 00143 * Two files will be created: /tmp/logfile.txt with the list of the 00144 * wrong classifications and /tmp/confusion.mat with the confusion 00145 * matrices. 00146 * 00147 * The confusion matrix file has following information: 00148 * 00149 * - How many classes were considered in the classification. 00150 * - The confusion matrix itself, which show at each row how many 00151 * percent of the presentations of the given object were recognized 00152 * as the given objects. For example, the previous code will produce 00153 * following matrix: 00154 * 00155 * \code 00156 * --- Circle Square correctSingle correctMultip correctReject ... 00157 * Circle 1.000 0.000 1.000000 0.000000 0.000000 ... 00158 * Square 0.222 0.778 0.777778 0.000000 0.000000 ... 00159 * \endcode 00160 * 00161 * which means that the circle were allways correctly recognized, and the 00162 * square was 22% wrong and 78% correctly classified. 00163 * - Total statistics. Here will be listed how many classifications were 00164 * considered in the statistics, the correct and wrong classifications and 00165 * the rejections. 00166 * - The n-Best statistic indicates that the correct object was at the n-th 00167 * position 00168 * - The shared n-Best position indicates how many of the classifications 00169 * were shared between different classes. 00170 * - The mininal number of positions indicates how many positions need to 00171 * be taken into consideration to assure a correct classification. 00172 */ 00173 class classificationStatistics : public ioObject { 00174 public: 00175 00176 // -------------------------------------------------- 00177 // classificationStatistics::parameters 00178 // -------------------------------------------------- 00179 00180 /** 00181 * the parameters for the class classificationStatistics 00182 */ 00183 class parameters : public ioObject { 00184 public: 00185 /** 00186 * default constructor 00187 */ 00188 parameters(); 00189 00190 /** 00191 * copy constructor 00192 * @param other the parameters object to be copied 00193 */ 00194 parameters(const parameters& other); 00195 00196 /** 00197 * destructor 00198 */ 00199 virtual ~parameters(); 00200 00201 /** 00202 * returns name of this type 00203 */ 00204 const char* getTypeName() const; 00205 00206 /** 00207 * copy the contents of a parameters object 00208 * @param other the parameters object to be copied 00209 * @return a reference to this parameters object 00210 */ 00211 parameters& copy(const parameters& other); 00212 00213 /** 00214 * copy data of "other" parameters 00215 */ 00216 parameters& operator=(const parameters& other); 00217 00218 /** 00219 * returns a pointer to a clone of the parameters 00220 */ 00221 virtual parameters* clone() const; 00222 00223 /** 00224 * write the parameters in the given ioHandler 00225 * @param handler the ioHandler to be used 00226 * @param complete if true (the default) the enclosing begin/end will 00227 * be also written, otherwise only the data block will be written. 00228 * @return true if write was successful 00229 */ 00230 virtual bool write(ioHandler& handler,const bool complete=true) const; 00231 00232 /** 00233 * read the parameters from the given ioHandler 00234 * @param handler the ioHandler to be used 00235 * @param complete if true (the default) the enclosing begin/end will 00236 * be also written, otherwise only the data block will be written. 00237 * @return true if write was successful 00238 */ 00239 virtual bool read(ioHandler& handler,const bool complete=true); 00240 00241 # ifdef _LTI_MSC_6 00242 /** 00243 * this function is required by MSVC only, as a workaround for a 00244 * very awful bug, which exists since MSVC V.4.0, and still by 00245 * V.6.0 with all bugfixes (so called "service packs") remains 00246 * there... This method is also public due to another bug, so please 00247 * NEVER EVER call this method directly: use read() instead 00248 */ 00249 bool readMS(ioHandler& handler,const bool complete=true); 00250 00251 /** 00252 * this function is required by MSVC only, as a workaround for a 00253 * very awful bug, which exists since MSVC V.4.0, and still by 00254 * V.6.0 with all bugfixes (so called "service packs") remains 00255 * there... This method is also public due to another bug, so please 00256 * NEVER EVER call this method directly: use write() instead 00257 */ 00258 bool writeMS(ioHandler& handler,const bool complete=true) const; 00259 # endif 00260 00261 // ------------------------------------------------------------- 00262 // the parameters 00263 // ------------------------------------------------------------- 00264 00265 /** 00266 * Use class names in file. 00267 * 00268 * If true, the ids of the objects are substituted by names 00269 * given in the file specified by parameter 'namesFile' 00270 * (Default value: false). 00271 * 00272 * \b Note: this must be set to false if you intend to 00273 * specify the idToNamesMap manually by calling setNames(). 00274 */ 00275 bool useNames; 00276 00277 /** 00278 * Filename with class names. 00279 * 00280 * Name of the file where the names for the objects can 00281 * be found (Default value "objectNames.nms"). 00282 * 00283 * This name is not expected to have a path, since it will always be 00284 * seached in the directory specified in 'path'. Thus, the names file 00285 * will be expected at parameters::path + parameters::namesFile. 00286 * 00287 * These files consist of two columns. The first containing 00288 * the object ids, the second the object names in double quotes and 00289 * separated by whitespaces e.g. 00290 * \code 00291 * 314 "Circle" 00292 * 4 "Square" 00293 * \endcode 00294 * 00295 * \b Note: only considered, if parameters useNames==true. Make 00296 * sure that the parameter path is also specified! 00297 */ 00298 std::string namesFile; 00299 00300 /** 00301 * Names in file first 00302 * 00303 * The order of the columns in the names file can be inverted setting 00304 * this parameter to true. 00305 * 00306 * Set to true if each line contains name of object followed by id. 00307 * 00308 * Set to false if each line contains the id followed by the object name. 00309 * 00310 * Default value: false 00311 */ 00312 bool namesInFileFirst; 00313 00314 /** 00315 * If the confusion matrix is going to be read by humans, it is 00316 * usually better if "-" are used instead of zeros. 00317 * If you need other tools (like Matlab, Excel, etc.) to read the 00318 * confusion matrix files, you need the zeros. 00319 * 00320 * Default value: true 00321 */ 00322 bool suppressZeros; 00323 00324 /** 00325 * if true, the confusion matrix will be saved automatically 00326 * in the file specified by parameter 'confusionMatrix'. Saving 00327 * intervals are defined by parameter 'saveStep' and on 00328 * destruction (Default value: true). 00329 */ 00330 bool writeConfusionMatrix; 00331 00332 /** 00333 * name of the file, where the confusion matrix will be saved 00334 * (Default value "confusion.mat"). Will be appended to 00335 * parameter 'path'. 00336 * 00337 * \b Note: only considered, if parameter writeConfusionMatrix==true. 00338 */ 00339 std::string confusionMatrix; 00340 00341 /** 00342 * specify the intervall of entries for automatic saving of 00343 * the confusion-matrix (Default value: 100, i.e. each 100 entries 00344 * the confusion-mtrix will be saved automatically). Alternatively 00345 * you may call flush() to force write at any moment. 00346 * 00347 * \b Note: only considered, if parameter writeConfusionMatrix==true. 00348 */ 00349 int saveStep; 00350 00351 /** 00352 * use a protocol file, where each entry will be registered. 00353 * (Default value: true) 00354 */ 00355 bool useLogFile; 00356 00357 /** 00358 * name for the protocol file (Default value "logfile.txt"). 00359 * Will be appended to parameter 'path'. 00360 * 00361 * \b Note: only considered, if parameter useLogFile==true. 00362 */ 00363 std::string logFile; 00364 00365 /** 00366 * if true, all patterns will be registered in the log file, if false, 00367 * only the errors will be registered (Default value: false). 00368 * 00369 * \b Note: only considered, if parameter useLogFile==true. 00370 */ 00371 bool logAllPatterns; 00372 00373 /** 00374 * The "path" string will be concatenated before all filenames 00375 * (Default value: "", i.e. working directory). 00376 */ 00377 std::string path; 00378 }; 00379 00380 /** 00381 * types of classification results 00382 */ 00383 enum eStatistics { 00384 CorrectSingle =-1, /*!< classifier chooses one class and it is the 00385 correct one */ 00386 CorrectMultiple=-2, /*!< classifier chooses many classes and one of 00387 them is correct */ 00388 CorrectReject =-3, /*!< classifier rejects one class which is indeed 00389 not the winner */ 00390 WrongSingle =-4, /*!< classifier chooses one class and it is wrong */ 00391 WrongMultiple =-5, /*!< classifier chooses many classes and all of 00392 theme are wrong */ 00393 WrongReject =-6, /*!< classifier rejects a class which would have 00394 been the correct winner */ 00395 Error =-7 /*!< there is an error on the data */ 00396 }; 00397 00398 00399 /** 00400 * default constructor 00401 */ 00402 classificationStatistics(); 00403 00404 /** 00405 * default constructor with parameters 00406 */ 00407 classificationStatistics(const parameters& par); 00408 00409 /** 00410 * Destructor 00411 */ 00412 ~classificationStatistics(); 00413 00414 /** 00415 * set the parameters 00416 */ 00417 bool setParameters(const parameters& params); 00418 00419 /** 00420 * returns a const reference to the parameters 00421 */ 00422 const parameters& getParameters() const; 00423 00424 /** 00425 * include a new classification result into the statistics. 00426 * The patternInfo is additional text that will appear in the 00427 * logFile. Returns the recognition state (see eStatistics). 00428 * 00429 * \b Note: the ID must be positive. negative indices are discarded. 00430 */ 00431 eStatistics consider(const classifier::outputVector& result, 00432 const int& realObjectID, 00433 const std::string& patternInfo = "-?-"); 00434 00435 /** 00436 * reset clears the statistics 00437 */ 00438 void reset(); 00439 00440 /** 00441 * save all statistics stored by now into the file specified in the 00442 * parameters. Note that the parameter writeConfusionMatrix must be 00443 * set to true. 00444 */ 00445 void flush(); 00446 00447 /** 00448 * write all accumulated statistic data to the specified stream. 00449 * this can be used to manually save the statistics. 00450 */ 00451 bool writeData(std::ostream& ostr); 00452 00453 /** 00454 * load the file "filename", which contains the correspondences between 00455 * a class id number and a class name. 00456 * 00457 * \b Note: calling this does NOT set the internal idToNamesMap!!! 00458 * 00459 * The file must consist of two columns. The first containing 00460 * the object ids, the second the object names (optionally in double 00461 * quotes) and separated by whitespaces e.g. 00462 * \code 00463 * 314 "Circle" 00464 * 4 "Square" 00465 * \endcode 00466 * 00467 * @param filename the name of the file to be used 00468 * @param idToNamesMap the map where the correspondences will be written. 00469 * all previous data in the map will be kept. 00470 * @param namesInFileFirst invert the expected order of the data. 00471 */ 00472 static bool loadNames(const std::string& filename, 00473 std::map<int,std::string>& idToNamesMap, 00474 const bool namesInFileFirst=false); 00475 00476 00477 /** 00478 * set the correspondences between a class id number and a class name. 00479 * 00480 * @param idMap the map where the correspondences are given. 00481 */ 00482 bool setNames(const std::map<int,std::string>& idMap); 00483 00484 00485 /** @name Data access members 00486 * Use these to access the internal data members 00487 */ 00488 //@{ 00489 00490 /** returns a const reference to protected member "saved" 00491 */ 00492 const bool& getSaved() const; 00493 00494 /** returns a const reference to the confusion matrix 00495 */ 00496 const dynamicMatrix<double>& getConfusionMatrix() const; 00497 00498 /** returns a const reference to the idToNamesMap 00499 */ 00500 const std::map<int,std::string>& getIdToNamesMap() const; 00501 00502 //@} 00503 00504 00505 /** @name Statistics access members 00506 * Use these to access the accumulated statistics 00507 */ 00508 //@{ 00509 00510 /** returns a const reference to the number of results 00511 */ 00512 const int& getNumberOfResults() const; 00513 00514 /** number of correct singles 00515 */ 00516 const int& getCorrectSingle() const; 00517 00518 /** number of correct multiples 00519 */ 00520 const int& getCorrectMultiple() const; 00521 00522 /** number of correct rejects 00523 */ 00524 const int& getCorrectReject() const; 00525 00526 /** number of wrong singles 00527 */ 00528 const int& getWrongSingle() const; 00529 00530 /** number of wrong multiples 00531 */ 00532 const int& getWrongMultiple() const; 00533 00534 /** number of wrong rejects 00535 */ 00536 const int& getWrongReject() const; 00537 00538 /** nBest recognition results for single correct 00539 */ 00540 const dynamicMatrix<double>& getNBestMatrix() const; 00541 00542 /** nBest recognition results for multiple correct 00543 */ 00544 const dynamicMatrix<double>& getNBestMultipleMatrix() const; 00545 00546 /** minPlaces, i.e. minimum number of elements required in order to 00547 * contain the proper class. 00548 */ 00549 const dynamicMatrix<double>& getMinPlacesMatrix() const; 00550 00551 //@} 00552 00553 00554 protected: 00555 00556 /** 00557 * save all statistic data in the respective files 00558 */ 00559 bool writeData(); 00560 00561 /** 00562 * write confusion matrix to file 00563 */ 00564 bool writeConfusionMatrix(std::ostream& out, 00565 const dynamicMatrix<double>& matrix); 00566 00567 /** 00568 * write nBest-matrix 00569 */ 00570 bool writeNBestMatrix(std::ostream& out, 00571 const dynamicMatrix<double>& matrix, 00572 const std::string& title, 00573 const bool& normalize, 00574 const std::string& labelSingular = "Best", 00575 const std::string& labelPlural = "Best"); 00576 00577 /** 00578 * write classification to file 00579 */ 00580 bool writeLogFileHeader(const std::string& filename); 00581 00582 /** 00583 * write classification to file 00584 */ 00585 bool writeLogFile(const classifier::outputVector& result, 00586 const eStatistics& rState, 00587 const int& realObjectID, 00588 const std::string& patternInfo); 00589 00590 /** 00591 * sort the object names in the matrices 00592 */ 00593 void sortObjNames(const dynamicMatrix<double>& matrix, 00594 std::vector<int>& seqToRealId); 00595 00596 /** 00597 * workaround for std::ios bug in gcc when writing double numbers 00598 */ 00599 void format(std::ostream& out, 00600 const int& width, 00601 const double& val) const; 00602 00603 00604 /** 00605 * workaround for std::ios bug in gcc when writing double numbers 00606 */ 00607 void format(std::ostream& out, 00608 const int& width, 00609 const int& val) const; 00610 00611 /** 00612 * use in the statistics an object with the given name and real id number 00613 */ 00614 bool inscribeObject(const std::string& name, const int& id); 00615 00616 00617 protected: 00618 00619 /** 00620 * store parameters 00621 */ 00622 parameters param; 00623 00624 /** 00625 * if true, all data has been saved. If false, some consider() have been 00626 * called, and there are unsaved information. 00627 */ 00628 bool saved; 00629 00630 /** 00631 * the confusion matrix stores the classification results, each 00632 * row represents one class (row index = realObjectID) and the 00633 * classification results for this class 00634 */ 00635 dynamicMatrix<double> confMatrix; 00636 00637 00638 /** 00639 * map an id number to the name of the object. 00640 */ 00641 std::map<int,std::string> idToNamesMap; 00642 00643 00644 /** @name Statistics 00645 * These are the internal members for storing the actual classification statistics 00646 */ 00647 //@{ 00648 00649 /** number of "consider(...)" called so far. Divide any value, e.g. correctSingle, 00650 * by this to obtain recognition rates. 00651 */ 00652 int numberOfResults; 00653 00654 /** number of correct singles 00655 */ 00656 int correctSingle; 00657 00658 /** number of correct multiples 00659 */ 00660 int correctMultiple; 00661 00662 /** number of correct rejects 00663 */ 00664 int correctReject; 00665 00666 /** number of wrong singles 00667 */ 00668 int wrongSingle; 00669 00670 /** number of wrong multiples 00671 */ 00672 int wrongMultiple; 00673 00674 /** number of wrong rejects 00675 */ 00676 int wrongReject; 00677 00678 // matrices with statistics per object, which tell how many n-best 00679 // classification have been done already 00680 00681 /** nBest recognition results for single correct 00682 */ 00683 dynamicMatrix<double> nBest; 00684 00685 /** nBest recognition results for multiple correct 00686 */ 00687 dynamicMatrix<double> nBestMultiple; 00688 00689 /** minPlaces, i.e. minimum number of elements required in order to 00690 * contain the proper class. 00691 */ 00692 dynamicMatrix<double> minPlaces; 00693 00694 //@} 00695 00696 /** 00697 * formatting widths for the columns of the log file 00698 */ 00699 static const int logWidths[]; 00700 00701 /** 00702 * formatting widths for the columns of the confusion matrix file 00703 */ 00704 static const int confWidths[]; 00705 00706 }; 00707 00708 00709 }; 00710 00711 #endif