ltiFeatureSaliencyIK.h

00001 /*
00002  * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*----------------------------------------------------------------
00025  * project ....: LTI Digital Image/Signal Processing Library
00026  * file .......: ltiFeatureSaliencyIK.h
00027  * authors ....: Pablo Alvarado
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 30.10.2000
00030  * revisions ..: $Id: ltiFeatureSaliencyIK.h,v 1.8 2006/02/08 11:08:21 ltilib Exp $
00031  */
00032 
00033 #ifndef _LTI_FEATURE_SALIENCY_IK_H_
00034 #define _LTI_FEATURE_SALIENCY_IK_H_
00035 
00036 #include "ltiObject.h"
00037 #include "ltiImage.h"
00038 #include "ltiSaliency.h"
00039 #include "ltiGaussianPyramid.h"
00040 
00041 namespace lti {
00042   /**
00043    * Feature Saliency for color images.
00044    *
00045    * This class allows the generation of a Saliency Map from a color
00046    * image based on the description in L. Itti et.at. "A Model of Saliency-
00047    * Based Visual Attention for Rapid Scene Analysis", IEEE Trans. on
00048    * Pattern Analysis and Machine Intelligence, Vol. 20, No. 11,
00049    * November 1998
00050    */
00051   class featureSaliencyIK : public saliency {
00052   public:
00053     /**
00054      * the parameters for the class featureSaliencyIK
00055      */
00056     class parameters : public saliency::parameters {
00057     public:
00058       /**
00059        * default constructor
00060        */
00061       parameters();
00062 
00063       /**
00064        * copy constructor
00065        * @param other the parameters object to be copied
00066        */
00067       parameters(const parameters& other);
00068 
00069       /**
00070        * destructor
00071        */
00072       ~parameters();
00073 
00074       /**
00075        * returns name of this type
00076        */
00077       const char* getTypeName() const;
00078 
00079       /**
00080        * copy the contents of a parameters object
00081        * @param other the parameters object to be copied
00082        * @return a reference to this parameters object
00083        */
00084       parameters& copy(const parameters& other);
00085 
00086       /**
00087        * returns a pointer to a clone of the parameters
00088        */
00089       virtual functor::parameters* clone() const;
00090 
00091       /**
00092        * write the parameters in the given ioHandler
00093        * @param handler the ioHandler to be used
00094        * @param complete if true (the default) the enclosing begin/end will
00095        *        be also written, otherwise only the data block will be written.
00096        * @return true if write was successful
00097        */
00098       virtual bool write(ioHandler& handler,const bool complete=true) const;
00099 
00100       /**
00101        * write the parameters in the given ioHandler
00102        * @param handler the ioHandler to be used
00103        * @param complete if true (the default) the enclosing begin/end will
00104        *        be also written, otherwise only the data block will be written.
00105        * @return true if write was successful
00106        */
00107       virtual bool read(ioHandler& handler,const bool complete=true);
00108 
00109 #     ifdef _LTI_MSC_6
00110       /**
00111        * this function is required by MSVC only, as a workaround for a
00112        * very awful bug, which exists since MSVC V.4.0, and still by
00113        * V.6.0 with all bugfixes (so called "service packs") remains
00114        * there...  This method is public due to another bug, so please
00115        * NEVER EVER call this method directly: use read() instead!
00116        */
00117       bool readMS(ioHandler& handler,const bool complete=true);
00118 
00119       /**
00120        * this function is required by MSVC only, as a workaround for a
00121        * very awful bug, which exists since MSVC V.4.0, and still by
00122        * V.6.0 with all bugfixes (so called "service packs") remains
00123        * there...  This method is public due to another bug, so please
00124        * NEVER EVER call this method directly: use write() instead!
00125        */
00126       bool writeMS(ioHandler& handler,const bool complete=true) const;
00127 #     endif
00128 
00129       // --------------------------------------------
00130       // the parameters
00131       // --------------------------------------------
00132 
00133       /**
00134        * Number of scales to be used in the multiresolutional analysis.
00135        * (Default value: 8)
00136        */
00137       int scales;
00138 
00139       /**
00140        * the saliency map will be generated with the given scale resolution.
00141        * The value must be between 0 and scales-1 (default 0)
00142        */
00143       int mapScale;
00144 
00145       /**
00146        * factor of the maximum intensity, which determine the threshold under
00147        * which all color information will be neglected (default 0.05 for 5%)
00148        */
00149       double colorThreshold;
00150 
00151       /**
00152        * mininum scale index to be checked in the conspicuity maps (default 1)
00153        */
00154       int minScale;
00155 
00156       /**
00157        * maximum scale index to be checked in the conspicuity maps (default 3)
00158        */
00159       int maxScale;
00160 
00161       /**
00162        * minimum delta for the inter-scale comparision (default 3)
00163        */
00164       int minDeltaScale;
00165 
00166       /**
00167        * maximum delta for the inter-scale comparition (default 4)
00168        * Please note that maxDeltaScale + maxScale must be less than scales
00169        */
00170       int maxDeltaScale;
00171 
00172       /**
00173        * gabor kernel size (default 9 (means 9x9 kernel))
00174        */
00175       int gaborKernelSize;
00176 
00177       /**
00178        * number of iterations to select the saliency regions (default 3).
00179        * (this allows an approximation of the suggestion in Itti and Koch
00180        *  "A saliency-based search mechanism for overt and covert shifts of
00181        *   visual attention" Elsevier 2000 (www.elsevier.com/locate/visres)
00182        */
00183       int smoothingIterations;
00184     };
00185 
00186     /**
00187      * default constructor
00188      */
00189     featureSaliencyIK();
00190 
00191     /**
00192      * copy constructor
00193      * @param other the object to be copied
00194      */
00195     featureSaliencyIK(const featureSaliencyIK& other);
00196 
00197     /**
00198      * destructor
00199      */
00200     virtual ~featureSaliencyIK();
00201 
00202     /**
00203      * returns the name of this type ("featureSaliencyIK")
00204      */
00205     virtual const char* getTypeName() const;
00206 
00207     /**
00208      * extract the saliency map of the image and leave it on dest
00209      * @param src image with the source data.
00210      * @param dest saliency map will be left.
00211      * @return true if successful, false otherwise.
00212      */
00213     bool apply(const image& src,channel& dest) const;
00214 
00215     /**
00216      * extract the conspicuity maps of intensity, color and orientation from
00217      * the given color image
00218      * @param src image with the source data.
00219      * @param conspIntensity conspicuity map for intensity
00220      * @param conspColor conspicuity map for color
00221      * @param conspOrientation conspicuity map for orientation
00222      * @return true if successful, false otherwise.
00223      */
00224     bool apply(const image& src,
00225                      channel& conspIntensity,
00226                      channel& conspColor,
00227                      channel& conspOrientation) const;
00228 
00229     /**
00230      * copy data of "other" functor.
00231      * @param other the functor to be copied
00232      * @return a reference to this functor object
00233      */
00234     featureSaliencyIK& copy(const featureSaliencyIK& other);
00235 
00236     /**
00237      * returns a pointer to a clone of this functor.
00238      */
00239     virtual functor* clone() const;
00240 
00241     /**
00242      * returns used parameters
00243      */
00244     const parameters& getParameters() const;
00245 
00246   protected:
00247     /**
00248      * calculate the after Itti & Koch required color channels:
00249      *
00250      * With
00251      * r', g' and b' the original color channels
00252      * I = (r'+g'+b')/3
00253      * r = r'/I
00254      * g = g'/I
00255      * b = b'/I
00256      *
00257      * the red, green, blue and yellow broadly tuned color channels
00258      * are defined as:
00259      *
00260      * R = r-(g+b)/2;         // negative values are set to zero!
00261      * G = g-(r+b)/2;         // negative values are set to zero!
00262      * B = b-(r+g)/2;         // negative values are set to zero!
00263      * Y = (r+g)/2-|r-g|/2-b; // negative values are set to zero!
00264      *
00265      */
00266     void getColorChannels(const image& img,
00267                           channel& RG,
00268                           channel& BY,
00269                           channel& I) const;
00270 
00271     /**
00272      * returns the number of pyramids required to represent all reconstructed
00273      * scales needed by the conspicuity maps
00274      */
00275     int getNumberPyramids() const;
00276 
00277     /**
00278      * return the index of the pyramid that contains the scaling of s to c
00279      */
00280     int getIndex(const int& c,const int& s) const;
00281 
00282     /**
00283      * return the used scales of the pyramid with index idx
00284      */
00285     void getLimits(const int& idx, int& from, int& to) const;
00286 
00287     /**
00288      * generate the upsampling pyramids required for the given pyramid
00289      */
00290     void getUpscaledPyramids(const gaussianPyramid<channel>& src,
00291                            std::vector<gaussianPyramid<channel> >& usp) const;
00292 
00293 
00294     /**
00295      * modified subtration (-) of two inter-resolutional channels...
00296      * This does an element-wise subtraction and absolute value:
00297      * result = | a - b |
00298      * The member assumes "connected" channels (not lined!)
00299      */
00300     void subtAndNorm(const channel& a,const channel& b,channel& result) const;
00301 
00302     /**
00303      * modified addition (+) of two inter-resolutional channels...
00304      * This does an element-wise subtraction and absolute value:
00305      * result = | a + b |
00306      * The member assumes "connected" channels (not lined!)
00307      */
00308     void addAndNorm(const channel& a,const channel& b,channel& result) const;
00309 
00310     /**
00311      * accumulate the normed channel c in the accumulator-channel acc
00312      */
00313     void normAndAcc(const channel& c, channel& acc) const;
00314 
00315     /**
00316      * normalize
00317      */
00318     void normalize(channel& chnl) const;
00319 
00320 
00321     /**
00322      * contrast channel is define as I(c,s)=|I(c) (-) I(s)|
00323      */
00324     void conspicuityI(const gaussianPyramid<channel>& pyr,channel& mapI) const;
00325 
00326     /**
00327      * opponent colors conspicuity C(c,s)=N(RG(c,s))+N(BY(c,s))
00328      * the given pyramid must contain the subtraction of two color channels,
00329      * i.e. R-G or B-Y
00330      */
00331     void conspicuityC(const gaussianPyramid<channel>& pRG,
00332                       const gaussianPyramid<channel>& pBY,
00333                       channel& mapI) const;
00334 
00335 
00336     /**
00337      * orientation conspicuity.  The gaussian pyramids must contain
00338      * gabor pyramids for the angles 0, 45, 90 and 135.  (Note that gabor
00339      * pyramids are a sort gaussian pyramids for the upsampling reconstruction)
00340      */
00341     void conspicuityO(const gaussianPyramid<channel>& p00,
00342                       const gaussianPyramid<channel>& p45,
00343                       const gaussianPyramid<channel>& p90,
00344                       const gaussianPyramid<channel>& p135,
00345                       channel& mapI) const;
00346 
00347 
00348     /**
00349      * add the scales in the given pyramid at the given scale, and leave
00350      * the result in the given channel.  The size of the channel must be
00351      * properly set before calling this function!
00352      */
00353     void add(const gaussianPyramid<channel>& pyr,
00354              const int& pyrIndex,
00355              const int& scale,
00356              channel& result) const;
00357 
00358     /**
00359      * iterative normalization
00360      */
00361     void itNorm(const int& steps,channel& chnl) const;
00362 
00363   };
00364 
00365 }
00366 
00367 #endif