From a5f2ee6f04d01476cd1031e851ea811025a15a6f Mon Sep 17 00:00:00 2001 From: Frank Ebner Date: Sat, 9 Jan 2016 18:34:03 +0100 Subject: [PATCH] added recent C++ code --- workspace/CMakeLists.txt | 2 +- workspace/main.cpp | 170 +++++++++++++++++++++++++++++++++++++-- workspace/pca/Data.h | 31 +++++-- workspace/pca/Settings.h | 2 +- workspace/pca/TrainPCA.h | 65 +++++++++++---- 5 files changed, 243 insertions(+), 27 deletions(-) diff --git a/workspace/CMakeLists.txt b/workspace/CMakeLists.txt index 25910b4..e3cf354 100644 --- a/workspace/CMakeLists.txt +++ b/workspace/CMakeLists.txt @@ -55,7 +55,7 @@ ADD_DEFINITIONS( -Wall -Werror=return-type -Wextra - #-O2 + -O2 ) endif() diff --git a/workspace/main.cpp b/workspace/main.cpp index 87d92f1..3d026d4 100644 --- a/workspace/main.cpp +++ b/workspace/main.cpp @@ -23,6 +23,10 @@ std::string getClass(const std::vector& nns) { return ""; } +struct ClassStats { + int counts[6] = {}; +}; + struct Stats{ int match; int error; @@ -31,23 +35,178 @@ struct Stats{ float getSum() {return match+error+unknown;} }; + + +std::vector removePatterns(const std::vector& patAll, const std::string& fileName) { + std::vector res; + for (const ClassifiedPattern& pat : patAll) { + if (pat.belongsToFile(fileName)) { + continue; + } else { + res.push_back(pat); + } + } + return res; +} + +template struct PCA { + + aKNN knn; + TrainPCA::Matrices m; + +}; + +class Plot { + + K::Gnuplot gp; + K::GnuplotSplot splot; + K::GnuplotSplotElementLines lines[5]; + + +public: + + Plot() { + for (int i = 0; i < 5; ++i) {lines[i].setColorHex(COLORS[i]);} + for (int i = 0; i < 5; ++i) {splot.add(&lines[i]);} + } + + void add(int idx, std::vector& vec) { + K::GnuplotPoint3 p3(vec[0], vec[1], vec[2]); + lines[idx].add(p3); + } + + void clear() { + for (int i = 0; i < 5; ++i) {lines[i].clear();} + } + + void show() { + gp.setDebugOutput(false); + gp.draw(splot); + gp.flush(); + } + +}; + int main(void) { omp_set_dynamic(false); omp_set_num_threads(3); - const int numFeatures = 3; + const int numFeatures = 10; + TrainPCA::Settings setTrain; + TrainPCA::Settings setClass; setClass.regionStart_ms += 25; + + Data::getAllDataFiles(); + Plot p; + + // convert all provided datasets into patterns + std::vector srcTrain = TrainPCA::getAllData(setTrain); + std::vector srcClass = TrainPCA::getAllData(setClass); + std::cout << "windows: " << srcTrain.size() << std::endl; + + // error calculation + std::unordered_map stats; + std::unordered_map classStats; + //int xx = 0; + std::unordered_map*> pcas; + + // try to classify each pattern + for (const ClassifiedPattern& patClassify : srcClass) { + + // construct knn search for this leave-one-out ONLY ONCE + if (pcas.find(patClassify.fileName) == pcas.end()) { + + std::cout << "constructing PCA for all files but " << patClassify.fileName << std::endl; + + // remove all training patterns belonging to the same source file as the to be classifed pattern + std::vector srcTrainLOO = removePatterns(srcTrain, patClassify.fileName); + + // sanity check (have we removed all patterns?) + int diff = srcTrain.size() - srcTrainLOO.size(); + if (diff < 200) {throw 1;} + + p.clear(); + PCA* pca = new PCA(); + pcas[patClassify.fileName] = pca; + + // train PCA using all pattern without those belonging to the same source file as the to-be-classified one + pca->m = TrainPCA::getMatrices(srcTrainLOO, numFeatures); + + // calculate features and add them to the KNN + for (const ClassifiedPattern& pat : srcTrainLOO) { + K::DynColVector vec = pca->m.A1 * K::PCAHelper::toVector(pat.pattern); + std::vector arr; + for (int i = 0; i < numFeatures; ++i) {arr.push_back(vec(i));} + pca->knn.add(ClassifiedFeature(pat.className, arr)); + + const int idx = Settings::classToInt(pat.className); + p.add(idx, arr); + + } + pca->knn.build(); + //p.show(); + //sleep(100); + + } + + { + + PCA* pca = pcas[patClassify.fileName]; + + // calculate features for the to-be-classified pattern + //const int idx = Settings::classToInt(pat.className); + K::DynColVector vec = pca->m.A1 * K::PCAHelper::toVector(patClassify.pattern); + + // get KNN's answer + std::vector arr; + for (int i = 0; i < numFeatures; ++i) {arr.push_back(vec(i));} + std::vector neighbors = pca->knn.get(arr.data(), 5); + std::string gotClass = getClass(neighbors); + + if (patClassify.className == gotClass) {stats["all"].match++; stats[patClassify.fileName].match++; stats[patClassify.className].match++;} + else if (gotClass == "") {stats["all"].unknown++; stats[patClassify.fileName].unknown++; stats[patClassify.className].unknown++;} + else {stats["all"].error++; stats[patClassify.fileName].error++; stats[patClassify.className].error++;} + + int gotIdx = (gotClass == "") ? (5) : Settings::classToInt(gotClass); + ++classStats[patClassify.className].counts[gotIdx]; + + } + + + } + + for (auto& it : stats) { + Stats& stats = it.second; + std::cout << "'" < patTrain = TrainPCA::getTrainData(); TrainPCA::Matrices m = TrainPCA::getMatrices(patTrain, numFeatures); std::vector patTest = TrainPCA::getTestData(); // construct knn - aKNN knn; + aKNN knn; for (const ClassifiedPattern& pat : patTrain) { K::DynColVector vec = m.A1 * K::PCAHelper::toVector(pat.pattern); - const std::vector arr = {vec(0), vec(1), vec(2)}; + std::vector arr; + for (int i = 0; i < numFeatures; ++i) {arr.push_back(vec(i));} knn.add(ClassifiedFeature(pat.className, arr)); } knn.build(); @@ -65,7 +224,8 @@ int main(void) { K::DynColVector vec = m.A1 * K::PCAHelper::toVector(pat.pattern); // get KNN's answer - std::vector arr = {vec(0), vec(1), vec(2)}; + std::vector arr; + for (int i = 0; i < numFeatures; ++i) {arr.push_back(vec(i));} std::vector neighbors = knn.get(arr.data(), 10); std::string gotClass = getClass(neighbors); @@ -93,7 +253,7 @@ int main(void) { sleep(10000); - +*/ diff --git a/workspace/pca/Data.h b/workspace/pca/Data.h index a454509..d5a90f8 100644 --- a/workspace/pca/Data.h +++ b/workspace/pca/Data.h @@ -7,9 +7,11 @@ #include "../sensors/SensorReader.h" struct ClassifiedPattern { - std::string className; + std::string className; // the class (practice) this pattenr belongs to + std::string fileName; // the file that produced this pattern std::vector pattern; - ClassifiedPattern(const std::string& className, const std::vector& pattern) : className(className), pattern(pattern) {;} + ClassifiedPattern(const std::string& className, const std::string& fileName, const std::vector& pattern) : className(className), fileName(fileName), pattern(pattern) {;} + bool belongsToFile(const std::string& fileName) const {return fileName == this->fileName;} }; struct ClassifiedFeature { @@ -38,6 +40,11 @@ class Data { public: + /** get ALL data files for each practice */ + static std::vector getAllDataFiles() { + return getDataFiles(99999); + } + /** get X data-files for each class */ static std::vector getDataFiles(const int filesPerClass) { @@ -74,14 +81,28 @@ public: for (const auto& val : rec.accel.values) {intAccel.add(val.ts, val.val);} intAccel.makeRelative(); + K::Interpolator intGyro; + for (const auto& val : rec.gyro.values) {intGyro.add(val.ts, val.val);} + intGyro.makeRelative(); + + K::Interpolator intMagnet; + for (const auto& val : rec.magField.values) {intMagnet.add(val.ts, val.val);} + intMagnet.makeRelative(); + + // determine the region's size const int regionEnd_ms = intAccel.values.back().key * regionPercent; - - // construct all sample windows std::vector> samples; for (int center = regionStart_ms; center < regionEnd_ms; center += stepSize_ms) { - std::vector window = getSampleWindow(intAccel, center, windowSize_ms, stepSize_ms); + std::vector window; + + // which sensors to use + std::vector wAccel = getSampleWindow(intAccel, center, windowSize_ms, stepSize_ms); window.insert(window.end(), wAccel.begin(), wAccel.end()); + //std::vector wGyro = getSampleWindow(intGyro, center, windowSize_ms, stepSize_ms); window.insert(window.end(), wGyro.begin(), wGyro.end()); + //std::vector wMagnet = getSampleWindow(intMagnet, center, windowSize_ms, stepSize_ms); window.insert(window.end(), wMagnet.begin(), wMagnet.end()); + + samples.push_back(window); } diff --git a/workspace/pca/Settings.h b/workspace/pca/Settings.h index 6039be4..12aabf2 100644 --- a/workspace/pca/Settings.h +++ b/workspace/pca/Settings.h @@ -7,7 +7,7 @@ class Settings { public: - std::string path = "/mnt/firma/kunden/HandyGames/daten"; + std::string path = "/mnt/firma/kunden/HandyGames/datenOK"; std::vector classNames = {"forwardbend", "jumpingjack", "kneebend", "pushups", "situps"}; static int classToInt(const std::string className) { diff --git a/workspace/pca/TrainPCA.h b/workspace/pca/TrainPCA.h index c3826ee..35bc31c 100644 --- a/workspace/pca/TrainPCA.h +++ b/workspace/pca/TrainPCA.h @@ -19,9 +19,43 @@ public: K::DynMatrix A3; }; + struct Settings { + int windowSize_ms = 1000; + int regionStart_ms = 1400; + float regionPercent = 0.85; + int stepSize_ms = 50; + }; + + /** parse all available data files using the given settings */ + static std::vector getAllData(const Settings& s) { + + // get all training-data files (all for each class) + std::vector files = Data::getDataFiles(999999); + std::cout << "training files: " << files.size() << std::endl; + + // construct patterns for each file + std::vector patterns; + for (ClassifiedDataFile cdf : files) { + + // read all samples from the given input file + std::cout << cdf.fileName << std::endl; + std::vector> samples = Data::getSamples(cdf.fileName, s.windowSize_ms, s.regionStart_ms, s.regionPercent, s.stepSize_ms); + + // convert them into a classified pattern + for (const std::vector vec : samples) { + patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec)); + } + + } + + return patterns; + + } + + /* static std::vector getTestData() { - const int windowSize_ms = 1000; + const int windowSize_ms = 2000; const int regionStart_ms = 1500 + 25; // worst case: half-window-size offset const float regionPercent = 0.85; const int stepSize_ms = 50; @@ -37,7 +71,7 @@ public: std::vector> samples = Data::getSamples(cdf.fileName, windowSize_ms, regionStart_ms, regionPercent, stepSize_ms); for (const std::vector vec : samples) { - patterns.push_back(ClassifiedPattern(cdf.className, vec)); + patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec)); } } @@ -46,16 +80,16 @@ public: } - /** train PCA features */ + // train PCA features static std::vector getTrainData() { - const int windowSize_ms = 1000; + const int windowSize_ms = 2000; const int regionStart_ms = 1500; const float regionPercent = 0.4; const int stepSize_ms = 50; // get 5 data-files per class - std::vector files = Data::getDataFiles(30); + std::vector files = Data::getDataFiles(6); // get patterns for each class std::vector patterns; @@ -66,7 +100,7 @@ public: std::cout << "\tgot" << samples.size() << " samples, each " << samples[0].size() << " values" << std::endl; for (const std::vector vec : samples) { - patterns.push_back(ClassifiedPattern(cdf.className, vec)); + patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec)); } } @@ -74,28 +108,29 @@ public: return patterns; } + */ /** get the A1,A2,A3 matrices for the given training data */ static Matrices getMatrices(const std::vector& data, const int numFeatures) { K::LinearTransform::PCA pca; - K::LinearTransform::MaxInterClassDistance inter; - K::LinearTransform::MinIntraClassDistance intra; + //K::LinearTransform::MaxInterClassDistance inter; + //K::LinearTransform::MinIntraClassDistance intra; for (const ClassifiedPattern& pat : data) { pca.add(pat.pattern); - inter.add(pat.className, pat.pattern); - intra.add(pat.className, pat.pattern); + //inter.add(pat.className, pat.pattern); + //intra.add(pat.className, pat.pattern); } Matrices m; m.A1 = pca.getA(numFeatures); - m.A2 = inter.getA(numFeatures); - m.A3 = intra.getA(numFeatures); + //m.A2 = inter.getA(numFeatures); + //m.A3 = intra.getA(numFeatures); - std::cout << "A1: " << std::endl << m.A1 << std::endl; - std::cout << "A2: " << std::endl << m.A2 << std::endl; - std::cout << "A3: " << std::endl << m.A3 << std::endl; + //std::cout << "A1: " << std::endl << m.A1 << std::endl; + //std::cout << "A2: " << std::endl << m.A2 << std::endl; + //std::cout << "A3: " << std::endl << m.A3 << std::endl; return m;