#ifndef KULLBACKLEIBLER_H #define KULLBACKLEIBLER_H #include "../distribution/Normal.h" #include "../distribution/NormalN.h" #include "../../Assertions.h" #include namespace Divergence { enum LOGMODE{ BASE2, BASE10, NATURALIS }; template class KullbackLeibler { public: /** Calculate the Kullback Leibler Distance from a set of sample densities * Info: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence * @param P is the vector containing the densities of a set of samples * @param Q is a vector containg the densities of the same samples set then P */ static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ // Assure P and Q have the same size and are finite in all values Assert::equal(P.size(), Q.size(), "The sample vectors P and Q do not have the same size."); if(!P.allFinite() || !Q.allFinite()){ Assert::doThrow("The sample vectors P and Q are not finite. Check for NaN or Inf."); } Scalar dist = 0.0; Scalar PQratio = 0.0; //normalize to 1 P /= P.sum(); Q /= Q.sum(); Assert::isNear((double)P.sum(), 1.0, 0.01,"Normalization failed.. this shouldn't happen"); Assert::isNear((double)Q.sum(), 1.0, 0.01, "Normalization failed.. this shouldn't happen"); //sum up the logarithmic difference between P and Q, also called kullback leibler... for(int i = 0; i < P.size(); ++i){ // if both prob are near zero we assume a 0 distance since lim->0 = 0 if((P[i] == 0.0) && (Q[i] == 0.0)){ dist += 0.0; } //if prob of P is 0 we also assume a 0 distance since lim->0 0 * log(0) = 0 else if ((P[i] == 0.0) && (Q[i] > 0.0)){ dist += 0.0; } else{ // calc PQratio if(Q[i] == 0.0){ //Assert::doThrow("Division by zero is not allowed ;)."); PQratio = P[i] / 0.00001; } else { PQratio = P[i] / Q[i]; } //use log for dist if (mode == NATURALIS){ dist += P[i] * log(PQratio); } else if (mode == BASE2){ dist += P[i] * log2(PQratio); } else if (mode == BASE10){ dist += P[i] * log10(PQratio); } } } return dist; } static inline Scalar getGeneralFromSamplesSymmetric(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ return getGeneralFromSamples(P, Q, mode) + getGeneralFromSamples(Q, P, mode); } /** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution * Info: https://tgmstat.wordpress.com/2013/07/10/kullback-leibler-divergence/ */ static inline Scalar getUnivariateGauss(Distribution::Normal norm1, Distribution::Normal norm2){ auto sigma1Quad = norm1.getSigma() * norm1.getSigma(); auto sigma2Quad = norm2.getSigma() * norm2.getSigma(); auto mu12Quad = (norm1.getMu() - norm2.getMu()) * (norm1.getMu() - norm2.getMu()); auto log1 = std::log(norm1.getSigma()); auto log2 = std::log(norm2.getSigma()); // kl = log(sigma_2 / sigma_1) + ((sigma_1^2 + (mu_1 - mu_2)^2) / 2 * sigma_2^2) - 0.5 double klb = (log2 - log1) + ((sigma1Quad + mu12Quad)/(2.0 * sigma2Quad)) - 0.5; //klb is always greater 0 if(klb < 0.0){ Assert::doThrow("The Kullback Leibler Distance is < 0! Thats not possible"); } return klb; } /** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution symmetric*/ static inline Scalar getUnivariateGaussSymmetric(Distribution::Normal norm1, Distribution::Normal norm2){ return getUnivariateGauss(norm1, norm2) + getUnivariateGauss(norm2, norm1); } /** Calculate the Kullback Leibler Distance for a multivariate Gaussian distribution */ static inline Scalar getMultivariateGauss(Distribution::NormalDistributionN norm1, Distribution::NormalDistributionN norm2){ //both gaussian have the same dimension. Assert::equal(norm1.getMu().rows(), norm2.getMu().rows(), "mean vectors do not have the same dimension"); Assert::equal(norm1.getSigma().rows(), norm2.getSigma().rows(), "cov matrices do not have the same dimension"); Assert::equal(norm1.getSigma().cols(), norm2.getSigma().cols(), "cov matrices do not have the same dimension"); //log auto det1 = norm1.getSigma().determinant(); auto det2 = norm2.getSigma().determinant(); auto log1 = std::log(det1); auto log2 = std::log(det2); //determinate shouldn't be 0! Assert::isNot0(det1, "Determinat of the first Gauss is Zero! Check the Cov Matrix."); Assert::isNot0(det2, "Determinat of the second Gauss is Zero! Check the Cov Matrix."); //trace Eigen::MatrixXd toTrace(norm1.getSigma().rows(),norm1.getSigma().cols()); toTrace = norm2.getSigmaInv() * norm1.getSigma(); auto trace = toTrace.trace(); //transpose Eigen::VectorXd toTranspose(norm1.getMu().rows()); toTranspose = norm2.getMu() - norm1.getMu(); auto transpose = toTranspose.transpose(); //rawdensity auto rawDensity = transpose * norm2.getSigmaInv() * toTranspose; auto dimension = norm1.getMu().rows(); //0.5 * ((log(det(cov_2)/det(cov_1)) + tr(cov_2^-1 cov_1) + (mu_2 - mu_1)^T * cov_2^-1 * (mu_2 - mu_1) - dimension) double klb = 0.5 * ((log2 - log1) + trace + rawDensity - dimension); //klb is always greater 0 if(klb < 0.0){ Assert::doThrow("The Kullback Leibler Distance is < 0! Thats not possible"); } Assert::isNotNaN(klb, "The Kullback Leibler Distance is NaN!"); return klb; } /** Calculate the Kullback Leibler Distance for a multivariate Gaussian distribution symmetric*/ static inline Scalar getMultivariateGaussSymmetric(Distribution::NormalDistributionN norm1, Distribution::NormalDistributionN norm2){ return getMultivariateGauss(norm1, norm2) + getMultivariateGauss(norm2, norm1); } }; } #endif // KULLBACKLEIBLER_H