From 59502931e55556a8ba2d52c9ffd19c758d194f89 Mon Sep 17 00:00:00 2001 From: toni Date: Thu, 23 Mar 2017 19:52:06 +0100 Subject: [PATCH] added kernel density wrapper added general kld solution fixed minor bugs added tests --- main.cpp | 4 +- math/distribution/KernelDensity.h | 35 ++++++++ math/distribution/NormalN.h | 12 ++- math/divergence/KullbackLeibler.h | 67 ++++++++++++++ sensors/offline/FileReader.h | 2 +- sensors/radio/model/WiFiModelLogDistCeiling.h | 6 +- tests/math/divergence/TestKullbackLeibler.cpp | 89 +++++++++++++++++++ 7 files changed, 207 insertions(+), 8 deletions(-) create mode 100644 math/distribution/KernelDensity.h diff --git a/main.cpp b/main.cpp index de06c02..9e42812 100755 --- a/main.cpp +++ b/main.cpp @@ -28,8 +28,8 @@ int main(int argc, char** argv) { //::testing::GTEST_FLAG(filter) = "*LogDistanceCeilingModelBeacon*"; //::testing::GTEST_FLAG(filter) = "*WiFiOptimizer*"; - - ::testing::GTEST_FLAG(filter) = "*Offline.readWrite*"; + ::testing::GTEST_FLAG(filter) = "*KullbackLeibler*"; + //::testing::GTEST_FLAG(filter) = "*Offline.readWrite*"; //::testing::GTEST_FLAG(filter) = "*Barometer*"; //::testing::GTEST_FLAG(filter) = "*GridWalk2RelPressure*"; diff --git a/math/distribution/KernelDensity.h b/math/distribution/KernelDensity.h new file mode 100644 index 0000000..a103e3e --- /dev/null +++ b/math/distribution/KernelDensity.h @@ -0,0 +1,35 @@ +#ifndef KERNELDENSITY_H +#define KERNELDENSITY_H + +#include +#include +#include + +#include + +#include "../../Assertions.h" +#include "../Random.h" + + +namespace Distribution { + + template class KernelDensity{ + + private: + const std::function probabilityFunction; + + + public: + KernelDensity(const std::function probabilityFunction) : probabilityFunction(probabilityFunction){ + + } + + T getProbability(Sample sample){ + return probabilityFunction(sample); + } + + }; + +} + +#endif // KERNELDENSITY_H diff --git a/math/distribution/NormalN.h b/math/distribution/NormalN.h index d98c4d9..bb332c4 100644 --- a/math/distribution/NormalN.h +++ b/math/distribution/NormalN.h @@ -15,8 +15,8 @@ namespace Distribution { private: - const Eigen::VectorXd mu; - const Eigen::MatrixXd sigma; + Eigen::VectorXd mu; + Eigen::MatrixXd sigma; const double _a; const Eigen::MatrixXd _sigmaInv; @@ -61,6 +61,14 @@ namespace Distribution { return this->_sigmaInv; } + void setSigma(Eigen::MatrixXd sigma){ + this->sigma = sigma; + } + + void setMu(Eigen::VectorXd mu){ + this->mu = mu; + } + /** return a NormalN based on given data */ static NormalDistributionN getNormalNFromSamples(const Eigen::MatrixXd& data) { diff --git a/math/divergence/KullbackLeibler.h b/math/divergence/KullbackLeibler.h index 1ae4433..aab08be 100644 --- a/math/divergence/KullbackLeibler.h +++ b/math/divergence/KullbackLeibler.h @@ -9,10 +9,77 @@ namespace Divergence { + enum LOGMODE{ + BASE2, + BASE10, + NATURALIS + }; + template class KullbackLeibler { public: + /** Calculate the Kullback Leibler Distance from a set of sample densities + * Info: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence + * @param P is the vector containing the densities of a set of samples + * @param Q is a vector containg the densities of the same samples set then P + */ + static inline Scalar getGeneralFromSamples(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ + + // Assure P and Q have the same size and are finite in all values + Assert::equal(P.size(), Q.size(), "The sample vectors P and Q do not have the same size."); + if(!P.allFinite() || !Q.allFinite()){ + Assert::doThrow("The sample vectors P and Q are not finite. Check for NaN or Inf."); + } + + Scalar dist = 0.0; + Scalar PQratio = 0.0; + + //normalize to 1 + P /= P.sum(); + Q /= Q.sum(); + + Assert::isNear((double)P.sum(), 1.0, 0.01,"Normalization failed.. this shouldn't happen"); + Assert::isNear((double)Q.sum(), 1.0, 0.01, "Normalization failed.. this shouldn't happen"); + + //sum up the logarithmic difference between P and Q, also called kullback leibler... + for(int i = 0; i < P.size(); ++i){ + + if((P[i] == 0.0) && (Q[i] == 0.0)){ + dist += 0.0; + } else { + + // calc PQratio + if(Q[i] == 0.0){ + Assert::doThrow("Division by zero is not allowed ;). TODO: What if the densities are to small?"); + //PQratio = P[i] / 0.00001; + } else { + PQratio = P[i] / Q[i]; + } + + //use log for dist + if (mode == NATURALIS){ + dist += P[i] * log(PQratio); + } + + else if (mode == BASE2){ + dist += P[i] * log2(PQratio); + } + + else if (mode == BASE10){ + dist += P[i] * log10(PQratio); + } + + } + } + + return dist; + } + + static inline Scalar getGeneralFromSamplesSymmetric(Eigen::VectorXd P, Eigen::VectorXd Q, LOGMODE mode){ + return getGeneralFromSamples(P, Q, mode) + getGeneralFromSamples(Q, P, mode); + } + /** Calculate the Kullback Leibler Distance for a univariate Gaussian distribution * Info: https://tgmstat.wordpress.com/2013/07/10/kullback-leibler-divergence/ */ diff --git a/sensors/offline/FileReader.h b/sensors/offline/FileReader.h index 99d0fd4..17786fc 100644 --- a/sensors/offline/FileReader.h +++ b/sensors/offline/FileReader.h @@ -193,7 +193,7 @@ namespace Offline { WiFiMeasurements wifi; Splitter s(data, sep); - for (size_t i = 0; i < s.size(); i += 3) { + for (size_t i = 0; i < s.size()-1; i += 3) { const std::string mac = s.get(i+0); const float freq = s.getFloat(i+1); diff --git a/sensors/radio/model/WiFiModelLogDistCeiling.h b/sensors/radio/model/WiFiModelLogDistCeiling.h index f0a7e05..f0e237f 100644 --- a/sensors/radio/model/WiFiModelLogDistCeiling.h +++ b/sensors/radio/model/WiFiModelLogDistCeiling.h @@ -89,9 +89,9 @@ public: void addAP(const MACAddress& accessPoint, const APEntry& params) { // sanity check - Assert::isBetween(params.waf, -99.0f, 0.0f, "WAF out of bounds [-99:0]"); - Assert::isBetween(params.txp, -50.0f, -30.0f, "TXP out of bounds [-50:-30]"); - Assert::isBetween(params.exp, 1.0f, 4.0f, "EXP out of bounds [1:4]"); + //Assert::isBetween(params.waf, -99.0f, 0.0f, "WAF out of bounds [-99:0]"); + //Assert::isBetween(params.txp, -50.0f, -30.0f, "TXP out of bounds [-50:-30]"); + //Assert::isBetween(params.exp, 1.0f, 4.0f, "EXP out of bounds [1:4]"); Assert::equal(accessPoints.find(accessPoint), accessPoints.end(), "AccessPoint already present! VAP-Grouping issue?"); diff --git a/tests/math/divergence/TestKullbackLeibler.cpp b/tests/math/divergence/TestKullbackLeibler.cpp index 0a899f1..6f066cb 100644 --- a/tests/math/divergence/TestKullbackLeibler.cpp +++ b/tests/math/divergence/TestKullbackLeibler.cpp @@ -211,4 +211,93 @@ TEST(KullbackLeibler, multivariateGaussGeCov) { ASSERT_GE(kld34sym, kld12sym); } +TEST(KullbackLeibler, generalFromSamples) { + //ge cov + Eigen::VectorXd mu1(2); + mu1[0] = 1.0; + mu1[1] = 1.0; + + Eigen::VectorXd mu2(2); + mu2[0] = 1.0; + mu2[1] = 1.0; + + Eigen::VectorXd mu3(2); + mu3[0] = 1.0; + mu3[1] = 1.0; + + Eigen::VectorXd mu4(2); + mu4[0] = 1.0; + mu4[1] = 1.0; + + Eigen::MatrixXd cov1(2,2); + cov1(0,0) = 1.0; + cov1(0,1) = 0.0; + cov1(1,0) = 0.0; + cov1(1,1) = 1.0; + + Eigen::MatrixXd cov2(2,2); + cov2(0,0) = 1.0; + cov2(0,1) = 0.0; + cov2(1,0) = 0.0; + cov2(1,1) = 1.0; + + Eigen::MatrixXd cov3(2,2); + cov3(0,0) = 1.0; + cov3(0,1) = 0.0; + cov3(1,0) = 0.0; + cov3(1,1) = 1.0; + + Eigen::MatrixXd cov4(2,2); + cov4(0,0) = 3.0; + cov4(0,1) = 0.0; + cov4(1,0) = 0.0; + cov4(1,1) = 1.0; + + Distribution::NormalDistributionN norm1(mu1, cov1); + Distribution::NormalDistributionN norm2(mu2, cov2); + Distribution::NormalDistributionN norm3(mu3, cov3); + Distribution::NormalDistributionN norm4(mu4, cov4); + + int size = 10000; + Eigen::VectorXd samples1(size); + Eigen::VectorXd samples2(size); + Eigen::VectorXd samples3(size); + Eigen::VectorXd samples4(size); + + //random numbers + std::mt19937_64 rng; + // initialize the random number generator with time-dependent seed + uint64_t timeSeed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + std::seed_seq ss{uint32_t(timeSeed & 0xffffffff), uint32_t(timeSeed>>32)}; + rng.seed(ss); + // initialize a uniform distribution between 0 and 1 + std::uniform_real_distribution unif(-9, 10); + + //generate samples + for(int i = 0; i < size; ++i){ + + double r1 = unif(rng); + double r2 = unif(rng); + Eigen::VectorXd v(2); + v << r1, r2; + + samples1[i] = norm1.getProbability(v); + samples2[i] = norm2.getProbability(v); + samples3[i] = norm3.getProbability(v); + samples4[i] = norm4.getProbability(v); + } + + double kld12 = Divergence::KullbackLeibler::getGeneralFromSamples(samples1, samples2, Divergence::LOGMODE::NATURALIS); + double kld34 = Divergence::KullbackLeibler::getGeneralFromSamples(samples3, samples4, Divergence::LOGMODE::NATURALIS); + std::cout << kld34 << " > " << kld12 << std::endl; + + double kld12sym = Divergence::KullbackLeibler::getGeneralFromSamplesSymmetric(samples1, samples2, Divergence::LOGMODE::NATURALIS); + double kld34sym = Divergence::KullbackLeibler::getGeneralFromSamplesSymmetric(samples3, samples4, Divergence::LOGMODE::NATURALIS); + std::cout << kld34sym << " > " << kld12sym << std::endl; + + ASSERT_GE(kld34, kld12); + ASSERT_GE(kld34sym, kld12sym); + +} + #endif