This commit is contained in:
2017-03-28 21:03:50 +02:00
5 changed files with 235 additions and 2 deletions

View File

@@ -0,0 +1,35 @@
#ifndef KERNELDENSITY_H
#define KERNELDENSITY_H
#include <cmath>
#include <random>
#include <functional>
#include <eigen3/Eigen/Dense>
#include "../../Assertions.h"
#include "../Random.h"
namespace Distribution {
template <typename T, typename Sample> class KernelDensity{
private:
const std::function<T(Sample)> probabilityFunction;
public:
KernelDensity(const std::function<T(Sample)> probabilityFunction) : probabilityFunction(probabilityFunction){
}
T getProbability(Sample sample){
return probabilityFunction(sample);
}
};
}
#endif // KERNELDENSITY_H

View File

@@ -15,8 +15,8 @@ namespace Distribution {
private:
const Eigen::VectorXd mu;
const Eigen::MatrixXd sigma;
Eigen::VectorXd mu;
Eigen::MatrixXd sigma;
const double _a;
const Eigen::MatrixXd _sigmaInv;
@@ -61,6 +61,14 @@ namespace Distribution {
return this->_sigmaInv;
}
void setSigma(Eigen::MatrixXd sigma){
this->sigma = sigma;
}
void setMu(Eigen::VectorXd mu){
this->mu = mu;
}
/** return a NormalN based on given data */
static NormalDistributionN getNormalNFromSamples(const Eigen::MatrixXd& data) {

View File

@@ -0,0 +1,29 @@
#ifndef JENSENSHANNON_H
#define JENSENSHANNON_H
#include "KullbackLeibler.h"
#include "../../Assertions.h"
#include <string>
namespace Divergence {
template <typename Scalar> class JensenShannon {
public:
/** Calculate the Jensen Shannon Divergece from a set of sample densities
* Info: https://en.wikipedia.org/wiki/JensenShannon_divergence
* @param P is the vector containing the densities of a set of samples
* @param Q is a vector containg the densities of the same samples set then P
*/
static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
Eigen::VectorXd M = 0.5 * (P + Q);
return (0.5 * KullbackLeibler<Scalar>::getGeneralFromSamples(P, M, mode)) + (0.5 * KullbackLeibler<Scalar>::getGeneralFromSamples(Q, M, mode));
}
};
}
#endif // JENSENSHANNON_H

View File

@@ -9,10 +9,82 @@
namespace Divergence {
enum LOGMODE{
BASE2,
BASE10,
NATURALIS
};
template <typename Scalar> class KullbackLeibler {
public:
/** Calculate the Kullback Leibler Distance from a set of sample densities
* Info: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
* @param P is the vector containing the densities of a set of samples
* @param Q is a vector containg the densities of the same samples set then P
*/
static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
// Assure P and Q have the same size and are finite in all values
Assert::equal(P.size(), Q.size(), "The sample vectors P and Q do not have the same size.");
if(!P.allFinite() || !Q.allFinite()){
Assert::doThrow("The sample vectors P and Q are not finite. Check for NaN or Inf.");
}
Scalar dist = 0.0;
Scalar PQratio = 0.0;
//normalize to 1
P /= P.sum();
Q /= Q.sum();
Assert::isNear((double)P.sum(), 1.0, 0.01,"Normalization failed.. this shouldn't happen");
Assert::isNear((double)Q.sum(), 1.0, 0.01, "Normalization failed.. this shouldn't happen");
//sum up the logarithmic difference between P and Q, also called kullback leibler...
for(int i = 0; i < P.size(); ++i){
// if both prob are near zero we assume a 0 distance since lim->0 = 0
if((P[i] == 0.0) && (Q[i] == 0.0)){
dist += 0.0;
}
//if prob of P is 0 we also assume a 0 distance since lim->0 0 * log(0) = 0
else if ((P[i] == 0.0) && (Q[i] > 0.0)){
dist += 0.0;
} else{
// calc PQratio
if(Q[i] == 0.0){
Assert::doThrow("Division by zero is not allowed ;).");
//PQratio = P[i] / 0.00001;
} else {
PQratio = P[i] / Q[i];
}
//use log for dist
if (mode == NATURALIS){
dist += P[i] * log(PQratio);
}
else if (mode == BASE2){
dist += P[i] * log2(PQratio);
}
else if (mode == BASE10){
dist += P[i] * log10(PQratio);
}
}
}
return dist;
}
static inline Scalar getGeneralFromSamplesSymmetric(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){
return getGeneralFromSamples(P, Q, mode) + getGeneralFromSamples(Q, P, mode);
}
/** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution
* Info: https://tgmstat.wordpress.com/2013/07/10/kullback-leibler-divergence/
*/

View File

@@ -211,4 +211,93 @@ TEST(KullbackLeibler, multivariateGaussGeCov) {
ASSERT_GE(kld34sym, kld12sym);
}
TEST(KullbackLeibler, generalFromSamples) {
//ge cov
Eigen::VectorXd mu1(2);
mu1[0] = 1.0;
mu1[1] = 1.0;
Eigen::VectorXd mu2(2);
mu2[0] = 1.0;
mu2[1] = 1.0;
Eigen::VectorXd mu3(2);
mu3[0] = 1.0;
mu3[1] = 1.0;
Eigen::VectorXd mu4(2);
mu4[0] = 1.0;
mu4[1] = 1.0;
Eigen::MatrixXd cov1(2,2);
cov1(0,0) = 1.0;
cov1(0,1) = 0.0;
cov1(1,0) = 0.0;
cov1(1,1) = 1.0;
Eigen::MatrixXd cov2(2,2);
cov2(0,0) = 1.0;
cov2(0,1) = 0.0;
cov2(1,0) = 0.0;
cov2(1,1) = 1.0;
Eigen::MatrixXd cov3(2,2);
cov3(0,0) = 1.0;
cov3(0,1) = 0.0;
cov3(1,0) = 0.0;
cov3(1,1) = 1.0;
Eigen::MatrixXd cov4(2,2);
cov4(0,0) = 3.0;
cov4(0,1) = 0.0;
cov4(1,0) = 0.0;
cov4(1,1) = 1.0;
Distribution::NormalDistributionN norm1(mu1, cov1);
Distribution::NormalDistributionN norm2(mu2, cov2);
Distribution::NormalDistributionN norm3(mu3, cov3);
Distribution::NormalDistributionN norm4(mu4, cov4);
int size = 10000;
Eigen::VectorXd samples1(size);
Eigen::VectorXd samples2(size);
Eigen::VectorXd samples3(size);
Eigen::VectorXd samples4(size);
//random numbers
std::mt19937_64 rng;
// initialize the random number generator with time-dependent seed
uint64_t timeSeed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
std::seed_seq ss{uint32_t(timeSeed & 0xffffffff), uint32_t(timeSeed>>32)};
rng.seed(ss);
// initialize a uniform distribution between 0 and 1
std::uniform_real_distribution<double> unif(-9, 10);
//generate samples
for(int i = 0; i < size; ++i){
double r1 = unif(rng);
double r2 = unif(rng);
Eigen::VectorXd v(2);
v << r1, r2;
samples1[i] = norm1.getProbability(v);
samples2[i] = norm2.getProbability(v);
samples3[i] = norm3.getProbability(v);
samples4[i] = norm4.getProbability(v);
}
double kld12 = Divergence::KullbackLeibler<float>::getGeneralFromSamples(samples1, samples2, Divergence::LOGMODE::NATURALIS);
double kld34 = Divergence::KullbackLeibler<float>::getGeneralFromSamples(samples3, samples4, Divergence::LOGMODE::NATURALIS);
std::cout << kld34 << " > " << kld12 << std::endl;
double kld12sym = Divergence::KullbackLeibler<float>::getGeneralFromSamplesSymmetric(samples1, samples2, Divergence::LOGMODE::NATURALIS);
double kld34sym = Divergence::KullbackLeibler<float>::getGeneralFromSamplesSymmetric(samples3, samples4, Divergence::LOGMODE::NATURALIS);
std::cout << kld34sym << " > " << kld12sym << std::endl;
ASSERT_GE(kld34, kld12);
ASSERT_GE(kld34sym, kld12sym);
}
#endif