added features. yes its late, dont judge me

This commit is contained in:
Toni
2016-01-09 03:57:48 +01:00
parent 253df9c777
commit 1af38ba3b5
4 changed files with 22782 additions and 3 deletions

22669
toni/octave/features.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -167,3 +167,80 @@ windowedData = {};
end
end
function features = featureCalculation(data)
features = [];
for k = 1:numel(data)
for j = 1:numel(data{k}.samples)
for i = 1:numel(data{k}.samples{j}.raw)
for m = 1:numel(data{k}.samples{j}.raw{i}.wins)
currentWindow = data{k}.samples{j}.raw{i}.wins{m};
currentWindow = currentWindow(:,1);
#autocorrelation on window. split into 5 evenly spaced bins (frequencies are evenly spaced, not number of values ;) ) and calculate mean of bin.
[autoCorr] = xcorr(currentWindow);
[binNum, binCenter] = hist(autoCorr, 5); #define 5 bins for the data.
binSize = abs(binCenter(end-1) - binCenter(end));
binEdges = linspace(binCenter(1)-(binSize/2), binCenter(end)+(binSize/2), 6);
[binNumc, binIdx] = histc(autoCorr, binEdges);
binMeans = getBinMean(autoCorr, binIdx, 5);
#calculate the root-mean-square (RMS) of the signal
rms = sqrt(mean(currentWindow.^2));
#power bands 0.5 to 25hz (useful if the windows are greater then 4s and window sizes to 256, 512..)
[powerBand, w] = periodogram(currentWindow); #fills up fft with zeros
powerEdges = logspace(log10(0.5), log10(25), 10 + 2); #logarithmic bin spaces for 10 bins
triFilter = getTriangularFunction(powerEdges, length(powerBand)*2 - 2);
for l = 1:numel(triFilter)
filteredBand = triFilter{l} .* powerBand;
psd(l) = sum(filteredBand); #sum freq (no log and no dct)
end
#put everything together
classLabel = k; #what class?
features = [features; classLabel, binMeans, rms, psd];
end
end
end
end
end
function value = getBinMean(data, idx, numBins)
value = [];
for i = 1:numBins
flagBinMembers = (idx == i);
binMembers = data(flagBinMembers);
# if length(binMembers) == 0
# idx
#data
# input = 'balala'
#endif
value(i) = mean(binMembers);
end
end
#triangular functions. (edges of the triangles; num fft values -> nfft.)
function triFilter = getTriangularFunction(edges, nfft)
#get idx of the edges within the samples. thanks to fft each sample represents a frequency.
# idx * samplerate / nfft = hertz of that idx
for i = 1:length(edges)
edgesByIdx(i) = floor((nfft + 1) * edges(i)/100); #100hz is the samplerate
end
#generate the triangle filters
triFilter = {};
for i = 1:length(edgesByIdx)-2
diffCnt = edgesByIdx(i+2) - edgesByIdx(i) + 1;
tmp = zeros(nfft/2 + 1, 1);
triVec = triang(diffCnt);
tmp(edgesByIdx(i):edgesByIdx(i+2)) = triVec;
triFilter{i} = tmp;
end
end

View File

@@ -54,7 +54,12 @@ magnitude is calculated using sqrt(x^2 + y^2 + z^2) for each sensor.
windowedClasses = windowData(filteredClasses);
#calculated features for the 5 signales (x, y, z, MG, PCA) of a sensor
windowedFeatures = featureCalculation(windowedClasses);
#{
data structure of features
label | feature #1 | 2 | 3 ...
#}
features = featureCalculation(windowedClasses);
#train svm

View File

@@ -18,11 +18,11 @@ pca (erste pca) (projektion der x,y,z koordinaten auf den ersten eigenvektor)
2) generiere features für die jeweiligen fenster aus den signalen
autocorrelation features:
autocorrelation bins
autocorrelation bins. jedes signal window wird in 5 frequenz bins unterteilt und von diesen der mean berechnet. wäre es nicht besser, für einen sensor feste bins für alle klassen zu haben? braucht aber wohl mehr bins, sonst ist der bereich zu klein?
energy features:
RMS
power spectrum bin mangnitudes
power spectrum (|fft|^2). features über logarithmisch überlappende vierrecksfilter, da dreieicksfilter hier keinen sinn ergeben, oder? Wie bei Audio noch eine DCT auf die Ergebnisse des Dreiecksfilters um unwichtige rauszuwerfen?
statistical features:
mean
@@ -34,3 +34,31 @@ interquartile range
classification:
SVM
Abschluss-Dokument:
- was geht
- was hat nicht funktioniert?
Training:
one leave out.