diff --git a/math/distribution/KernelDensity.h b/math/distribution/KernelDensity.h new file mode 100644 index 0000000..a103e3e --- /dev/null +++ b/math/distribution/KernelDensity.h @@ -0,0 +1,35 @@ +#ifndef KERNELDENSITY_H +#define KERNELDENSITY_H + +#include +#include +#include + +#include + +#include "../../Assertions.h" +#include "../Random.h" + + +namespace Distribution { + + template class KernelDensity{ + + private: + const std::function probabilityFunction; + + + public: + KernelDensity(const std::function probabilityFunction) : probabilityFunction(probabilityFunction){ + + } + + T getProbability(Sample sample){ + return probabilityFunction(sample); + } + + }; + +} + +#endif // KERNELDENSITY_H diff --git a/math/distribution/NormalN.h b/math/distribution/NormalN.h index d98c4d9..bb332c4 100644 --- a/math/distribution/NormalN.h +++ b/math/distribution/NormalN.h @@ -15,8 +15,8 @@ namespace Distribution { private: - const Eigen::VectorXd mu; - const Eigen::MatrixXd sigma; + Eigen::VectorXd mu; + Eigen::MatrixXd sigma; const double _a; const Eigen::MatrixXd _sigmaInv; @@ -61,6 +61,14 @@ namespace Distribution { return this->_sigmaInv; } + void setSigma(Eigen::MatrixXd sigma){ + this->sigma = sigma; + } + + void setMu(Eigen::VectorXd mu){ + this->mu = mu; + } + /** return a NormalN based on given data */ static NormalDistributionN getNormalNFromSamples(const Eigen::MatrixXd& data) { diff --git a/math/divergence/JensenShannon.h b/math/divergence/JensenShannon.h new file mode 100644 index 0000000..5c9bbb3 --- /dev/null +++ b/math/divergence/JensenShannon.h @@ -0,0 +1,29 @@ +#ifndef JENSENSHANNON_H +#define JENSENSHANNON_H + +#include "KullbackLeibler.h" + +#include "../../Assertions.h" +#include + + +namespace Divergence { + +template class JensenShannon { + +public: + /** Calculate the Jensen Shannon Divergece from a set of sample densities + * Info: https://en.wikipedia.org/wiki/Jensen–Shannon_divergence + * @param P is the vector containing the densities of a set of samples + * @param Q is a vector containg the densities of the same samples set then P + */ + static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ + Eigen::VectorXd M = 0.5 * (P + Q); + + return (0.5 * KullbackLeibler::getGeneralFromSamples(P, M, mode)) + (0.5 * KullbackLeibler::getGeneralFromSamples(Q, M, mode)); + } +}; + +} + +#endif // JENSENSHANNON_H diff --git a/math/divergence/KullbackLeibler.h b/math/divergence/KullbackLeibler.h index 1ae4433..630e5d2 100644 --- a/math/divergence/KullbackLeibler.h +++ b/math/divergence/KullbackLeibler.h @@ -9,10 +9,82 @@ namespace Divergence { + enum LOGMODE{ + BASE2, + BASE10, + NATURALIS + }; + template class KullbackLeibler { public: + /** Calculate the Kullback Leibler Distance from a set of sample densities + * Info: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence + * @param P is the vector containing the densities of a set of samples + * @param Q is a vector containg the densities of the same samples set then P + */ + static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ + + // Assure P and Q have the same size and are finite in all values + Assert::equal(P.size(), Q.size(), "The sample vectors P and Q do not have the same size."); + if(!P.allFinite() || !Q.allFinite()){ + Assert::doThrow("The sample vectors P and Q are not finite. Check for NaN or Inf."); + } + + Scalar dist = 0.0; + Scalar PQratio = 0.0; + + //normalize to 1 + P /= P.sum(); + Q /= Q.sum(); + + Assert::isNear((double)P.sum(), 1.0, 0.01,"Normalization failed.. this shouldn't happen"); + Assert::isNear((double)Q.sum(), 1.0, 0.01, "Normalization failed.. this shouldn't happen"); + + //sum up the logarithmic difference between P and Q, also called kullback leibler... + for(int i = 0; i < P.size(); ++i){ + + // if both prob are near zero we assume a 0 distance since lim->0 = 0 + if((P[i] == 0.0) && (Q[i] == 0.0)){ + dist += 0.0; + } + //if prob of P is 0 we also assume a 0 distance since lim->0 0 * log(0) = 0 + else if ((P[i] == 0.0) && (Q[i] > 0.0)){ + dist += 0.0; + } else{ + + // calc PQratio + if(Q[i] == 0.0){ + Assert::doThrow("Division by zero is not allowed ;)."); + //PQratio = P[i] / 0.00001; + } else { + PQratio = P[i] / Q[i]; + } + + //use log for dist + if (mode == NATURALIS){ + dist += P[i] * log(PQratio); + } + + else if (mode == BASE2){ + dist += P[i] * log2(PQratio); + } + + else if (mode == BASE10){ + dist += P[i] * log10(PQratio); + } + + } + } + + return dist; + } + + static inline Scalar getGeneralFromSamplesSymmetric(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ + return getGeneralFromSamples(P, Q, mode) + getGeneralFromSamples(Q, P, mode); + } + /** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution * Info: https://tgmstat.wordpress.com/2013/07/10/kullback-leibler-divergence/ */ diff --git a/tests/math/divergence/TestKullbackLeibler.cpp b/tests/math/divergence/TestKullbackLeibler.cpp index 0a899f1..6f066cb 100644 --- a/tests/math/divergence/TestKullbackLeibler.cpp +++ b/tests/math/divergence/TestKullbackLeibler.cpp @@ -211,4 +211,93 @@ TEST(KullbackLeibler, multivariateGaussGeCov) { ASSERT_GE(kld34sym, kld12sym); } +TEST(KullbackLeibler, generalFromSamples) { + //ge cov + Eigen::VectorXd mu1(2); + mu1[0] = 1.0; + mu1[1] = 1.0; + + Eigen::VectorXd mu2(2); + mu2[0] = 1.0; + mu2[1] = 1.0; + + Eigen::VectorXd mu3(2); + mu3[0] = 1.0; + mu3[1] = 1.0; + + Eigen::VectorXd mu4(2); + mu4[0] = 1.0; + mu4[1] = 1.0; + + Eigen::MatrixXd cov1(2,2); + cov1(0,0) = 1.0; + cov1(0,1) = 0.0; + cov1(1,0) = 0.0; + cov1(1,1) = 1.0; + + Eigen::MatrixXd cov2(2,2); + cov2(0,0) = 1.0; + cov2(0,1) = 0.0; + cov2(1,0) = 0.0; + cov2(1,1) = 1.0; + + Eigen::MatrixXd cov3(2,2); + cov3(0,0) = 1.0; + cov3(0,1) = 0.0; + cov3(1,0) = 0.0; + cov3(1,1) = 1.0; + + Eigen::MatrixXd cov4(2,2); + cov4(0,0) = 3.0; + cov4(0,1) = 0.0; + cov4(1,0) = 0.0; + cov4(1,1) = 1.0; + + Distribution::NormalDistributionN norm1(mu1, cov1); + Distribution::NormalDistributionN norm2(mu2, cov2); + Distribution::NormalDistributionN norm3(mu3, cov3); + Distribution::NormalDistributionN norm4(mu4, cov4); + + int size = 10000; + Eigen::VectorXd samples1(size); + Eigen::VectorXd samples2(size); + Eigen::VectorXd samples3(size); + Eigen::VectorXd samples4(size); + + //random numbers + std::mt19937_64 rng; + // initialize the random number generator with time-dependent seed + uint64_t timeSeed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + std::seed_seq ss{uint32_t(timeSeed & 0xffffffff), uint32_t(timeSeed>>32)}; + rng.seed(ss); + // initialize a uniform distribution between 0 and 1 + std::uniform_real_distribution unif(-9, 10); + + //generate samples + for(int i = 0; i < size; ++i){ + + double r1 = unif(rng); + double r2 = unif(rng); + Eigen::VectorXd v(2); + v << r1, r2; + + samples1[i] = norm1.getProbability(v); + samples2[i] = norm2.getProbability(v); + samples3[i] = norm3.getProbability(v); + samples4[i] = norm4.getProbability(v); + } + + double kld12 = Divergence::KullbackLeibler::getGeneralFromSamples(samples1, samples2, Divergence::LOGMODE::NATURALIS); + double kld34 = Divergence::KullbackLeibler::getGeneralFromSamples(samples3, samples4, Divergence::LOGMODE::NATURALIS); + std::cout << kld34 << " > " << kld12 << std::endl; + + double kld12sym = Divergence::KullbackLeibler::getGeneralFromSamplesSymmetric(samples1, samples2, Divergence::LOGMODE::NATURALIS); + double kld34sym = Divergence::KullbackLeibler::getGeneralFromSamplesSymmetric(samples3, samples4, Divergence::LOGMODE::NATURALIS); + std::cout << kld34sym << " > " << kld12sym << std::endl; + + ASSERT_GE(kld34, kld12); + ASSERT_GE(kld34sym, kld12sym); + +} + #endif