144 lines
3.7 KiB
C++
144 lines
3.7 KiB
C++
#ifndef TRAINPCA_H
|
|
#define TRAINPCA_H
|
|
|
|
#include "Data.h"
|
|
#include "Settings.h"
|
|
#include <KLib/math/linearTransform/PCA.h>
|
|
|
|
class TrainPCA {
|
|
|
|
private:
|
|
|
|
|
|
|
|
public:
|
|
|
|
struct Matrices {
|
|
K::DynMatrix<float> A1;
|
|
K::DynMatrix<float> A2;
|
|
K::DynMatrix<float> A3;
|
|
};
|
|
|
|
struct Settings {
|
|
int windowSize_ms = 1000;
|
|
int regionStart_ms = 1400;
|
|
float regionPercent = 0.85;
|
|
int stepSize_ms = 50;
|
|
};
|
|
|
|
/** parse all available data files using the given settings */
|
|
static std::vector<ClassifiedPattern> getAllData(const Settings& s) {
|
|
|
|
// get all training-data files (all for each class)
|
|
std::vector<ClassifiedDataFile> files = Data::getDataFiles(999999);
|
|
std::cout << "training files: " << files.size() << std::endl;
|
|
|
|
// construct patterns for each file
|
|
std::vector<ClassifiedPattern> patterns;
|
|
for (ClassifiedDataFile cdf : files) {
|
|
|
|
// read all samples from the given input file
|
|
std::cout << cdf.fileName << std::endl;
|
|
std::vector<std::vector<float>> samples = Data::getSamples(cdf.fileName, s.windowSize_ms, s.regionStart_ms, s.regionPercent, s.stepSize_ms);
|
|
|
|
// convert them into a classified pattern
|
|
for (const std::vector<float> vec : samples) {
|
|
patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec));
|
|
}
|
|
|
|
}
|
|
|
|
return patterns;
|
|
|
|
}
|
|
|
|
/*
|
|
static std::vector<ClassifiedPattern> getTestData() {
|
|
|
|
const int windowSize_ms = 2000;
|
|
const int regionStart_ms = 1500 + 25; // worst case: half-window-size offset
|
|
const float regionPercent = 0.85;
|
|
const int stepSize_ms = 50;
|
|
|
|
// get 10 data-files per class
|
|
std::vector<ClassifiedDataFile> files = Data::getDataFiles(30);
|
|
|
|
// get patterns for each class
|
|
std::vector<ClassifiedPattern> patterns;
|
|
for (ClassifiedDataFile cdf : files) {
|
|
|
|
std::cout << cdf.fileName << std::endl;
|
|
std::vector<std::vector<float>> samples = Data::getSamples(cdf.fileName, windowSize_ms, regionStart_ms, regionPercent, stepSize_ms);
|
|
|
|
for (const std::vector<float> vec : samples) {
|
|
patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec));
|
|
}
|
|
|
|
}
|
|
|
|
return patterns;
|
|
|
|
}
|
|
|
|
// train PCA features
|
|
static std::vector<ClassifiedPattern> getTrainData() {
|
|
|
|
const int windowSize_ms = 2000;
|
|
const int regionStart_ms = 1500;
|
|
const float regionPercent = 0.4;
|
|
const int stepSize_ms = 50;
|
|
|
|
// get 5 data-files per class
|
|
std::vector<ClassifiedDataFile> files = Data::getDataFiles(6);
|
|
|
|
// get patterns for each class
|
|
std::vector<ClassifiedPattern> patterns;
|
|
for (ClassifiedDataFile cdf : files) {
|
|
|
|
std::cout << cdf.fileName << std::endl;
|
|
std::vector<std::vector<float>> samples = Data::getSamples(cdf.fileName, windowSize_ms, regionStart_ms, regionPercent, stepSize_ms);
|
|
std::cout << "\tgot" << samples.size() << " samples, each " << samples[0].size() << " values" << std::endl;
|
|
|
|
for (const std::vector<float> vec : samples) {
|
|
patterns.push_back(ClassifiedPattern(cdf.className, cdf.fileName, vec));
|
|
}
|
|
|
|
}
|
|
|
|
return patterns;
|
|
|
|
}
|
|
*/
|
|
|
|
/** get the A1,A2,A3 matrices for the given training data */
|
|
static Matrices getMatrices(const std::vector<ClassifiedPattern>& data, const int numFeatures) {
|
|
|
|
K::LinearTransform<float>::PCA pca;
|
|
//K::LinearTransform<float>::MaxInterClassDistance<std::string> inter;
|
|
//K::LinearTransform<float>::MinIntraClassDistance<std::string> intra;
|
|
|
|
for (const ClassifiedPattern& pat : data) {
|
|
pca.add(pat.pattern);
|
|
//inter.add(pat.className, pat.pattern);
|
|
//intra.add(pat.className, pat.pattern);
|
|
}
|
|
|
|
Matrices m;
|
|
m.A1 = pca.getA(numFeatures);
|
|
//m.A2 = inter.getA(numFeatures);
|
|
//m.A3 = intra.getA(numFeatures);
|
|
|
|
//std::cout << "A1: " << std::endl << m.A1 << std::endl;
|
|
//std::cout << "A2: " << std::endl << m.A2 << std::endl;
|
|
//std::cout << "A3: " << std::endl << m.A3 << std::endl;
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
#endif // TRAINPCA_H
|
|
|