169 lines
7.2 KiB
C++
169 lines
7.2 KiB
C++
#ifndef KULLBACKLEIBLER_H
|
|
#define KULLBACKLEIBLER_H
|
|
|
|
#include "../distribution/Normal.h"
|
|
#include "../distribution/NormalN.h"
|
|
|
|
#include "../../Assertions.h"
|
|
#include <string>
|
|
|
|
namespace Divergence {
|
|
|
|
enum LOGMODE{
|
|
BASE2,
|
|
BASE10,
|
|
NATURALIS
|
|
};
|
|
|
|
template <typename Scalar> class KullbackLeibler {
|
|
|
|
public:
|
|
|
|
/** Calculate the Kullback Leibler Distance from a set of sample densities
|
|
* Info: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
|
|
* @param P is the vector containing the densities of a set of samples
|
|
* @param Q is a vector containg the densities of the same samples set then P
|
|
*/
|
|
static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
|
|
|
|
// Assure P and Q have the same size and are finite in all values
|
|
Assert::equal(P.size(), Q.size(), "The sample vectors P and Q do not have the same size.");
|
|
if(!P.allFinite() || !Q.allFinite()){
|
|
Assert::doThrow("The sample vectors P and Q are not finite. Check for NaN or Inf.");
|
|
}
|
|
|
|
Scalar dist = 0.0;
|
|
Scalar PQratio = 0.0;
|
|
|
|
//normalize to 1
|
|
P /= P.sum();
|
|
Q /= Q.sum();
|
|
|
|
Assert::isNear((double)P.sum(), 1.0, 0.01,"Normalization failed.. this shouldn't happen");
|
|
Assert::isNear((double)Q.sum(), 1.0, 0.01, "Normalization failed.. this shouldn't happen");
|
|
|
|
//sum up the logarithmic difference between P and Q, also called kullback leibler...
|
|
for(int i = 0; i < P.size(); ++i){
|
|
|
|
// if both prob are near zero we assume a 0 distance since lim->0 = 0
|
|
if((P[i] == 0.0) && (Q[i] == 0.0)){
|
|
dist += 0.0;
|
|
}
|
|
//if prob of P is 0 we also assume a 0 distance since lim->0 0 * log(0) = 0
|
|
else if ((P[i] == 0.0) && (Q[i] > 0.0)){
|
|
dist += 0.0;
|
|
} else{
|
|
|
|
// calc PQratio
|
|
if(Q[i] == 0.0){
|
|
Assert::doThrow("Division by zero is not allowed ;).");
|
|
//PQratio = P[i] / 0.00001;
|
|
} else {
|
|
PQratio = P[i] / Q[i];
|
|
}
|
|
|
|
//use log for dist
|
|
if (mode == NATURALIS){
|
|
dist += P[i] * log(PQratio);
|
|
}
|
|
|
|
else if (mode == BASE2){
|
|
dist += P[i] * log2(PQratio);
|
|
}
|
|
|
|
else if (mode == BASE10){
|
|
dist += P[i] * log10(PQratio);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
return dist;
|
|
}
|
|
|
|
static inline Scalar getGeneralFromSamplesSymmetric(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
|
|
return getGeneralFromSamples(P, Q, mode) + getGeneralFromSamples(Q, P, mode);
|
|
}
|
|
|
|
/** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution
|
|
* Info: https://tgmstat.wordpress.com/2013/07/10/kullback-leibler-divergence/
|
|
*/
|
|
static inline Scalar getUnivariateGauss(Distribution::Normal<Scalar> norm1, Distribution::Normal<Scalar> norm2){
|
|
|
|
auto sigma1Quad = norm1.getSigma() * norm1.getSigma();
|
|
auto sigma2Quad = norm2.getSigma() * norm2.getSigma();
|
|
auto mu12Quad = (norm1.getMu() - norm2.getMu()) * (norm1.getMu() - norm2.getMu());
|
|
auto log1 = std::log(norm1.getSigma());
|
|
auto log2 = std::log(norm2.getSigma());
|
|
|
|
// kl = log(sigma_2 / sigma_1) + ((sigma_1^2 + (mu_1 - mu_2)^2) / 2 * sigma_2^2) - 0.5
|
|
double klb = (log2 - log1) + ((sigma1Quad + mu12Quad)/(2.0 * sigma2Quad)) - 0.5;
|
|
|
|
//klb is always greater 0
|
|
if(klb < 0.0){
|
|
Assert::doThrow("The Kullback Leibler Distance is < 0! Thats not possible");
|
|
}
|
|
return klb;
|
|
}
|
|
|
|
/** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution symmetric*/
|
|
static inline Scalar getUnivariateGaussSymmetric(Distribution::Normal<Scalar> norm1, Distribution::Normal<Scalar> norm2){
|
|
return getUnivariateGauss(norm1, norm2) + getUnivariateGauss(norm2, norm1);
|
|
}
|
|
|
|
/** Calculate the Kullback Leibler Distance for a multivariate Gaussian distribution */
|
|
static inline Scalar getMultivariateGauss(Distribution::NormalDistributionN norm1, Distribution::NormalDistributionN norm2){
|
|
|
|
//both gaussian have the same dimension.
|
|
Assert::equal(norm1.getMu().rows(), norm2.getMu().rows(), "mean vectors do not have the same dimension");
|
|
Assert::equal(norm1.getSigma().rows(), norm2.getSigma().rows(), "cov matrices do not have the same dimension");
|
|
Assert::equal(norm1.getSigma().cols(), norm2.getSigma().cols(), "cov matrices do not have the same dimension");
|
|
|
|
//log
|
|
auto det1 = norm1.getSigma().determinant();
|
|
auto det2 = norm2.getSigma().determinant();
|
|
auto log1 = std::log(det1);
|
|
auto log2 = std::log(det2);
|
|
|
|
//determinate shouldn't be 0!
|
|
Assert::isNot0(det1, "Determinat of the first Gauss is Zero! Check the Cov Matrix.");
|
|
Assert::isNot0(det2, "Determinat of the second Gauss is Zero! Check the Cov Matrix.");
|
|
|
|
//trace
|
|
Eigen::MatrixXd toTrace(norm1.getSigma().rows(),norm1.getSigma().cols());
|
|
toTrace = norm2.getSigmaInv() * norm1.getSigma();
|
|
auto trace = toTrace.trace();
|
|
|
|
//transpose
|
|
Eigen::VectorXd toTranspose(norm1.getMu().rows());
|
|
toTranspose = norm2.getMu() - norm1.getMu();
|
|
auto transpose = toTranspose.transpose();
|
|
|
|
//rawdensity
|
|
auto rawDensity = transpose * norm2.getSigmaInv() * toTranspose;
|
|
auto dimension = norm1.getMu().rows();
|
|
|
|
//0.5 * ((log(det(cov_2)/det(cov_1)) + tr(cov_2^-1 cov_1) + (mu_2 - mu_1)^T * cov_2^-1 * (mu_2 - mu_1) - dimension)
|
|
double klb = 0.5 * ((log2 - log1) + trace + rawDensity - dimension);
|
|
|
|
//klb is always greater 0
|
|
if(klb < 0.0){
|
|
Assert::doThrow("The Kullback Leibler Distance is < 0! Thats not possible");
|
|
}
|
|
Assert::isNotNaN(klb, "The Kullback Leibler Distance is NaN!");
|
|
|
|
return klb;
|
|
}
|
|
|
|
/** Calculate the Kullback Leibler Distance for a multivariate Gaussian distribution symmetric*/
|
|
static inline Scalar getMultivariateGaussSymmetric(Distribution::NormalDistributionN norm1, Distribution::NormalDistributionN norm2){
|
|
return getMultivariateGauss(norm1, norm2) + getMultivariateGauss(norm2, norm1);
|
|
}
|
|
|
|
|
|
};
|
|
|
|
}
|
|
|
|
#endif // KULLBACKLEIBLER_H
|