Files
HandyGames/workspace/pca/TrainPCA.h
2016-01-09 18:34:03 +01:00

144 lines
3.7 KiB
C++

#ifndef TRAINPCA_H
#define TRAINPCA_H
#include "Data.h"
#include "Settings.h"
#include <KLib/math/linearTransform/PCA.h>
class TrainPCA {
private:
public:
struct Matrices {
K::DynMatrix<float> A1;
K::DynMatrix<float> A2;
K::DynMatrix<float> A3;
};
struct Settings {
int windowSize_ms = 1000;
int regionStart_ms = 1400;
float regionPercent = 0.85;
int stepSize_ms = 50;
};
/** parse all available data files using the given settings */
static std::vector<ClassifiedPattern> getAllData(const Settings& s) {
// get all training-data files (all for each class)
std::vector<ClassifiedDataFile> files = Data::getDataFiles(999999);
std::cout << "training files: " << files.size() << std::endl;
// construct patterns for each file
std::vector<ClassifiedPattern> patterns;
for (ClassifiedDataFile cdf : files) {
// read all samples from the given input file
std::cout << cdf.fileName << std::endl;
std::vector<std::vector<float>> samples = Data::getSamples(cdf.fileName, s.windowSize_ms, s.regionStart_ms, s.regionPercent, s.stepSize_ms);
// convert them into a classified pattern
for (const std::vector<float> vec : samples) {
patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec));
}
}
return patterns;
}
/*
static std::vector<ClassifiedPattern> getTestData() {
const int windowSize_ms = 2000;
const int regionStart_ms = 1500 + 25; // worst case: half-window-size offset
const float regionPercent = 0.85;
const int stepSize_ms = 50;
// get 10 data-files per class
std::vector<ClassifiedDataFile> files = Data::getDataFiles(30);
// get patterns for each class
std::vector<ClassifiedPattern> patterns;
for (ClassifiedDataFile cdf : files) {
std::cout << cdf.fileName << std::endl;
std::vector<std::vector<float>> samples = Data::getSamples(cdf.fileName, windowSize_ms, regionStart_ms, regionPercent, stepSize_ms);
for (const std::vector<float> vec : samples) {
patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec));
}
}
return patterns;
}
// train PCA features
static std::vector<ClassifiedPattern> getTrainData() {
const int windowSize_ms = 2000;
const int regionStart_ms = 1500;
const float regionPercent = 0.4;
const int stepSize_ms = 50;
// get 5 data-files per class
std::vector<ClassifiedDataFile> files = Data::getDataFiles(6);
// get patterns for each class
std::vector<ClassifiedPattern> patterns;
for (ClassifiedDataFile cdf : files) {
std::cout << cdf.fileName << std::endl;
std::vector<std::vector<float>> samples = Data::getSamples(cdf.fileName, windowSize_ms, regionStart_ms, regionPercent, stepSize_ms);
std::cout << "\tgot" << samples.size() << " samples, each " << samples[0].size() << " values" << std::endl;
for (const std::vector<float> vec : samples) {
patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec));
}
}
return patterns;
}
*/
/** get the A1,A2,A3 matrices for the given training data */
static Matrices getMatrices(const std::vector<ClassifiedPattern>& data, const int numFeatures) {
K::LinearTransform<float>::PCA pca;
//K::LinearTransform<float>::MaxInterClassDistance<std::string> inter;
//K::LinearTransform<float>::MinIntraClassDistance<std::string> intra;
for (const ClassifiedPattern& pat : data) {
pca.add(pat.pattern);
//inter.add(pat.className, pat.pattern);
//intra.add(pat.className, pat.pattern);
}
Matrices m;
m.A1 = pca.getA(numFeatures);
//m.A2 = inter.getA(numFeatures);
//m.A3 = intra.getA(numFeatures);
//std::cout << "A1: " << std::endl << m.A1 << std::endl;
//std::cout << "A2: " << std::endl << m.A2 << std::endl;
//std::cout << "A3: " << std::endl << m.A3 << std::endl;
return m;
}
};
#endif // TRAINPCA_H