This repository has been archived on 2020-04-08. You can view files and clone it, but cannot push or open issues or pull requests.
Files
Indoor/math/divergence/KullbackLeibler.h
2017-04-19 14:59:04 +02:00

169 lines
7.2 KiB
C++

#ifndef KULLBACKLEIBLER_H
#define KULLBACKLEIBLER_H
#include "../distribution/Normal.h"
#include "../distribution/NormalN.h"
#include "../../Assertions.h"
#include <string>
namespace Divergence {
enum LOGMODE{
BASE2,
BASE10,
NATURALIS
};
template <typename Scalar> class KullbackLeibler {
public:
/** Calculate the Kullback Leibler Distance from a set of sample densities
* Info: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
* @param P is the vector containing the densities of a set of samples
* @param Q is a vector containg the densities of the same samples set then P
*/
static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
// Assure P and Q have the same size and are finite in all values
Assert::equal(P.size(), Q.size(), "The sample vectors P and Q do not have the same size.");
if(!P.allFinite() || !Q.allFinite()){
Assert::doThrow("The sample vectors P and Q are not finite. Check for NaN or Inf.");
}
Scalar dist = 0.0;
Scalar PQratio = 0.0;
//normalize to 1
P /= P.sum();
Q /= Q.sum();
Assert::isNear((double)P.sum(), 1.0, 0.01,"Normalization failed.. this shouldn't happen");
Assert::isNear((double)Q.sum(), 1.0, 0.01, "Normalization failed.. this shouldn't happen");
//sum up the logarithmic difference between P and Q, also called kullback leibler...
for(int i = 0; i < P.size(); ++i){
// if both prob are near zero we assume a 0 distance since lim->0 = 0
if((P[i] == 0.0) && (Q[i] == 0.0)){
dist += 0.0;
}
//if prob of P is 0 we also assume a 0 distance since lim->0 0 * log(0) = 0
else if ((P[i] == 0.0) && (Q[i] > 0.0)){
dist += 0.0;
} else{
// calc PQratio
if(Q[i] == 0.0){
Assert::doThrow("Division by zero is not allowed ;).");
//PQratio = P[i] / 0.00001;
} else {
PQratio = P[i] / Q[i];
}
//use log for dist
if (mode == NATURALIS){
dist += P[i] * log(PQratio);
}
else if (mode == BASE2){
dist += P[i] * log2(PQratio);
}
else if (mode == BASE10){
dist += P[i] * log10(PQratio);
}
}
}
return dist;
}
static inline Scalar getGeneralFromSamplesSymmetric(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
return getGeneralFromSamples(P, Q, mode) + getGeneralFromSamples(Q, P, mode);
}
/** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution
* Info: https://tgmstat.wordpress.com/2013/07/10/kullback-leibler-divergence/
*/
static inline Scalar getUnivariateGauss(Distribution::Normal<Scalar> norm1, Distribution::Normal<Scalar> norm2){
auto sigma1Quad = norm1.getSigma() * norm1.getSigma();
auto sigma2Quad = norm2.getSigma() * norm2.getSigma();
auto mu12Quad = (norm1.getMu() - norm2.getMu()) * (norm1.getMu() - norm2.getMu());
auto log1 = std::log(norm1.getSigma());
auto log2 = std::log(norm2.getSigma());
// kl = log(sigma_2 / sigma_1) + ((sigma_1^2 + (mu_1 - mu_2)^2) / 2 * sigma_2^2) - 0.5
double klb = (log2 - log1) + ((sigma1Quad + mu12Quad)/(2.0 * sigma2Quad)) - 0.5;
//klb is always greater 0
if(klb < 0.0){
Assert::doThrow("The Kullback Leibler Distance is < 0! Thats not possible");
}
return klb;
}
/** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution symmetric*/
static inline Scalar getUnivariateGaussSymmetric(Distribution::Normal<Scalar> norm1, Distribution::Normal<Scalar> norm2){
return getUnivariateGauss(norm1, norm2) + getUnivariateGauss(norm2, norm1);
}
/** Calculate the Kullback Leibler Distance for a multivariate Gaussian distribution */
static inline Scalar getMultivariateGauss(Distribution::NormalDistributionN norm1, Distribution::NormalDistributionN norm2){
//both gaussian have the same dimension.
Assert::equal(norm1.getMu().rows(), norm2.getMu().rows(), "mean vectors do not have the same dimension");
Assert::equal(norm1.getSigma().rows(), norm2.getSigma().rows(), "cov matrices do not have the same dimension");
Assert::equal(norm1.getSigma().cols(), norm2.getSigma().cols(), "cov matrices do not have the same dimension");
//log
auto det1 = norm1.getSigma().determinant();
auto det2 = norm2.getSigma().determinant();
auto log1 = std::log(det1);
auto log2 = std::log(det2);
//determinate shouldn't be 0!
Assert::isNot0(det1, "Determinat of the first Gauss is Zero! Check the Cov Matrix.");
Assert::isNot0(det2, "Determinat of the second Gauss is Zero! Check the Cov Matrix.");
//trace
Eigen::MatrixXd toTrace(norm1.getSigma().rows(),norm1.getSigma().cols());
toTrace = norm2.getSigmaInv() * norm1.getSigma();
auto trace = toTrace.trace();
//transpose
Eigen::VectorXd toTranspose(norm1.getMu().rows());
toTranspose = norm2.getMu() - norm1.getMu();
auto transpose = toTranspose.transpose();
//rawdensity
auto rawDensity = transpose * norm2.getSigmaInv() * toTranspose;
auto dimension = norm1.getMu().rows();
//0.5 * ((log(det(cov_2)/det(cov_1)) + tr(cov_2^-1 cov_1) + (mu_2 - mu_1)^T * cov_2^-1 * (mu_2 - mu_1) - dimension)
double klb = 0.5 * ((log2 - log1) + trace + rawDensity - dimension);
//klb is always greater 0
if(klb < 0.0){
Assert::doThrow("The Kullback Leibler Distance is < 0! Thats not possible");
}
Assert::isNotNaN(klb, "The Kullback Leibler Distance is NaN!");
return klb;
}
/** Calculate the Kullback Leibler Distance for a multivariate Gaussian distribution symmetric*/
static inline Scalar getMultivariateGaussSymmetric(Distribution::NormalDistributionN norm1, Distribution::NormalDistributionN norm2){
return getMultivariateGauss(norm1, norm2) + getMultivariateGauss(norm2, norm1);
}
};
}
#endif // KULLBACKLEIBLER_H