added octave files for preprocessing: filtering, windowing and additional signal calculation (3x PCA, 3x Magnitude)

This commit is contained in:
Toni
2016-01-06 23:11:38 +01:00
parent 7ce2718306
commit 253df9c777
4 changed files with 442 additions and 0 deletions

169
toni/octave/functions.m Normal file
View File

@@ -0,0 +1,169 @@
display("functions")
function files = getDataFiles(clsName, trainsetPerClass)
setDataDir = "/home/toni/Documents/handygames/HandyGames/daten/";
dir = strcat(setDataDir, clsName, "/");
lst = readdir(dir);
files = {};
cnt = 0;
for i = 1:numel(lst)
filename = lst{i};
if (regexp(filename, ".m$")) # use only files ending with .m
file = strcat(dir, filename);
files = [files file];
++cnt;
if (cnt >= trainsetPerClass) # use only x input files
break;
endif
endif
end
end
function samples = getSamplesForClass(clsName, trainsetPerClass, start, percent)
files = getDataFiles(clsName, trainsetPerClass); #load Data files - thanks franke
samples = {};
start = start / 10; #start is given in ms and our raw input data has an fixed update-intervall of 10ms
for i = 1 : numel(files)
source(files{i});
lengthSamples = length(Magnet); # length/number of all available samples
pEnd = lengthSamples * percent; # how many samples do we want?
raw = {Gyro(start:pEnd,:), Accel(start:pEnd,:), Magnet(start:pEnd,:)}; # create cell array of the wanted samples
samples{i}.raw = raw; # write them into cell array for each Class
end
display(strcat("training data for '", clsName, "': ", num2str(length(samples)), " samples"));
end
function data = getRawTrainData(trainsetPerClass)
data = {};
data{1}.samples = getSamplesForClass("forwardbend", trainsetPerClass, 10, 0.9);
data{2}.samples = getSamplesForClass("kneebend", trainsetPerClass, 10, 0.9);
data{3}.samples = getSamplesForClass("pushups", trainsetPerClass, 10, 0.9);
data{4}.samples = getSamplesForClass("situps", trainsetPerClass, 10, 0.9);
data{5}.samples = getSamplesForClass("jumpingjack", trainsetPerClass, 10, 0.9);
end
function plotData(data,outPath)
for k = 1:numel(data)
for j = 1:numel(data{k}.samples)
for i = 1:numel(data{k}.samples{j}.raw)
mat = data{k}.samples{j}.raw{i};
#subplot(6, 3, idx);
plot(mat);
legend("x","y","z");
print( gcf, '-dpng', fullfile(outPath, sprintf('img_%d_%d_%d.png', k, j, i) ) );
end
end
end
end
function filteredData = filterData(data)
pkg load signal;
filteredData = {};
for k = 1:numel(data)
for j = 1:numel(data{k}.samples)
for i = 1:numel(data{k}.samples{j}.raw)
mat = data{k}.samples{j}.raw{i};
[b, a] = butter(16, 0.2);
filteredData{k}.samples{j}.raw{i} = filtfilt(b, a, mat); #filtfilt extends the signal at beginning so we have no phaseshift
end
end
end
end
function win = window(rawVec, posMS)
pos = posMS / 10;
#check if out of bounce, if yes- fill with zeros
#the last windows are sometimes filled with a lot of zeros... this could cause problems
if length(rawVec) <= pos+100-1
#win = rawVec(pos-100:length(rawVec),:);
#fillOut = zeros((pos+100-1) - length(rawVec), 3);
#length(fillOut)
#win = [win; fillOut];
win = [];
else
win = rawVec(pos-100:pos+100-1,:); #100*10 ms is half the window size. that is 2s for each window.
endif
end
function out = getMagnitude(data)
out = [];
for i = 1:length(data)
tmp = sqrt(data(i,1)^2 + data(i,2)^2 + data(i,3)^2);
out = [out; tmp];
end
end
function windowedData = windowData(data)
windowedData = {};
for k = 1:numel(data)
for j = 1:numel(data{k}.samples)
winsAccel = {};
winsGyro = {};
winsMagnet = {};
winsAccelPCA = {};
winsGyroPCA = {};
winsMagnetPCA = {};
winsAccelMG = {};
winsGyroMG = {};
winsMagnetMG = {};
winEnd = length(data{k}.samples{j}.raw{1}) * 10; # *10ms
for i = 1010:200:winEnd-1010 #200ms steps for sliding/overlapping windows
#accel
winAccel = window(data{k}.samples{j}.raw{1}, i);
winsAccel = [winsAccel winAccel];
[coeff1, score1] = princomp(winAccel);
winsAccelPCA = [winsAccelPCA score1(:,1)]; #choose the first axis (eigvec with the biggest eigvalue)
winAccelMG = getMagnitude(winAccel);
winsAccelMG = [winsAccelMG winAccelMG];
#gyro
winGyro = window(data{k}.samples{j}.raw{2}, i);
winsGyro = [winsGyro winGyro];
[coeff2, score2] = princomp(winGyro);
winsGyroPCA = [winsGyroPCA score2(:,1)];
winGyroMG = getMagnitude(winGyro);
winsGyroMG = [winsGyroMG winGyroMG];
#magnet
winMagnet = window(data{k}.samples{j}.raw{3}, i);
winsMagnet = [winsMagnet winMagnet];
[coeff3, score3] = princomp(winMagnet);
winsMagnetPCA = [winsMagnetPCA score3(:,1)];
winMagnetMG = getMagnitude(winMagnet);
winsMagnetMG = [winsMagnetMG winMagnetMG];
end
#write back data
windowedData{k}.samples{j}.raw{1}.wins = winsAccel;
windowedData{k}.samples{j}.raw{2}.wins = winsGyro;
windowedData{k}.samples{j}.raw{3}.wins = winsMagnet;
windowedData{k}.samples{j}.raw{4}.wins = winsAccelPCA;
windowedData{k}.samples{j}.raw{5}.wins = winsGyroPCA;
windowedData{k}.samples{j}.raw{6}.wins = winsMagnetPCA;
windowedData{k}.samples{j}.raw{7}.wins = winsAccelMG;
windowedData{k}.samples{j}.raw{8}.wins = winsGyroMG;
windowedData{k}.samples{j}.raw{9}.wins = winsMagnetMG;
end
end
end

176
toni/octave/princomp.m Normal file
View File

@@ -0,0 +1,176 @@
## Copyright (C) 2013 Fernando Damian Nieuwveldt <fdnieuwveldt@gmail.com>
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License
## as published by the Free Software Foundation; either version 3
## of the License, or (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn {Function File} {[@var{COEFF}]} = princomp(@var{X})
## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE}]} = princomp(@var{X})
## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent}]} = princomp(@var{X})
## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent},@var{tsquare}]} = princomp(@var{X})
## @deftypefnx {Function File} {[...]} = princomp(@var{X},'econ')
## @itemize @bullet
## @item
## princomp performs principal component analysis on a NxP data matrix X
## @item
## @var{COEFF} : returns the principal component coefficients
## @item
## @var{SCORE} : returns the principal component scores, the representation of X
## in the principal component space
## @item
## @var{LATENT} : returns the principal component variances, i.e., the
## eigenvalues of the covariance matrix X.
## @item
## @var{TSQUARE} : returns Hotelling's T-squared Statistic for each observation in X
## @item
## [...] = princomp(X,'econ') returns only the elements of latent that are not
## necessarily zero, and the corresponding columns of COEFF and SCORE, that is,
## when n <= p, only the first n-1. This can be significantly faster when p is
## much larger than n. In this case the svd will be applied on the transpose of
## the data matrix X
##
## @end itemize
##
## @subheading References
##
## @enumerate
## @item
## Jolliffe, I. T., Principal Component Analysis, 2nd Edition, Springer, 2002
##
## @end enumerate
## @end deftypefn
function [COEFF,SCORE,latent,tsquare] = princomp(X,varargin)
if (nargin < 1 || nargin > 2)
print_usage ();
endif
if (nargin == 2 && ! strcmpi (varargin{:}, "econ"))
error ("princomp: if a second input argument is present, it must be the string 'econ'");
endif
[nobs nvars] = size(X);
# Center the columns to mean zero
Xcentered = bsxfun(@minus,X,mean(X));
# Check if there are more variables then observations
if nvars <= nobs
[U,S,COEFF] = svd(Xcentered, "econ");
else
# Calculate the svd on the transpose matrix, much faster
if (nargin == 2 && strcmpi ( varargin{:} , "econ"))
[COEFF,S,V] = svd(Xcentered' , 'econ');
else
[COEFF,S,V] = svd(Xcentered');
endif
endif
if nargout > 1
# Get the Scores
SCORE = Xcentered*COEFF;
# Get the rank of the SCORE matrix
r = rank(SCORE);
# Only use the first r columns, pad rest with zeros if economy != 'econ'
SCORE = SCORE(:,1:r) ;
if !(nargin == 2 && strcmpi ( varargin{:} , "econ"))
SCORE = [SCORE, zeros(nobs , nvars-r)];
else
COEFF = COEFF(: , 1:r);
endif
endif
if nargout > 2
# This is the same as the eigenvalues of the covariance matrix of X
latent = (diag(S'*S)/(size(Xcentered,1)-1))(1:r);
if !(nargin == 2 && strcmpi ( varargin{:} , "econ"))
latent= [latent;zeros(nvars-r,1)];
endif
endif
if nargout > 3
# Calculate the Hotelling T-Square statistic for the observations
tsquare = sumsq(zscore(SCORE(:,1:r)),2);
endif
endfunction
%!shared COEFF,SCORE,latent,tsquare,m,x,R,V,lambda,i,S,F
#NIST Engineering Statistics Handbook example (6.5.5.2)
%!test
%! x=[7 4 3
%! 4 1 8
%! 6 3 5
%! 8 6 1
%! 8 5 7
%! 7 2 9
%! 5 3 3
%! 9 5 8
%! 7 4 5
%! 8 2 2];
%! R = corrcoef (x);
%! [V, lambda] = eig (R);
%! [~, i] = sort(diag(lambda), "descend"); #arrange largest PC first
%! S = V(:, i) * diag(sqrt(diag(lambda)(i)));
%!assert(diag(S(:, 1:2)*S(:, 1:2)'), [0.8662; 0.8420; 0.9876], 1E-4); #contribution of first 2 PCs to each original variable
%! B = V(:, i) * diag( 1./ sqrt(diag(lambda)(i)));
%! F = zscore(x)*B;
%! [COEFF,SCORE,latent,tsquare] = princomp(zscore(x, 1));
%!assert(tsquare,sumsq(F, 2),1E4*eps);
%!test
%! x=[1,2,3;2,1,3]';
%! [COEFF,SCORE,latent,tsquare] = princomp(x);
%! m=[sqrt(2),sqrt(2);sqrt(2),-sqrt(2);-2*sqrt(2),0]/2;
%! m(:,1) = m(:,1)*sign(COEFF(1,1));
%! m(:,2) = m(:,2)*sign(COEFF(1,2));
%!assert(COEFF,m(1:2,:),10*eps);
%!assert(SCORE,-m,10*eps);
%!assert(latent,[1.5;.5],10*eps);
%!assert(tsquare,[2;2;2],10*eps);
%!test
%! x=x';
%! [COEFF,SCORE,latent,tsquare] = princomp(x);
%! m=[sqrt(2),sqrt(2),0;-sqrt(2),sqrt(2),0;0,0,2]/2;
%! m(:,1) = m(:,1)*sign(COEFF(1,1));
%! m(:,2) = m(:,2)*sign(COEFF(1,2));
%! m(:,3) = m(:,3)*sign(COEFF(3,3));
%!assert(COEFF,m,10*eps);
%!assert(SCORE(:,1),-m(1:2,1),10*eps);
%!assert(SCORE(:,2:3),zeros(2),10*eps);
%!assert(latent,[1;0;0],10*eps);
%!assert(tsquare,[1;1],10*eps)
%!test
%! [COEFF,SCORE,latent,tsquare] = princomp(x, "econ");
%!assert(COEFF,m(:, 1),10*eps);
%!assert(SCORE,-m(1:2,1),10*eps);
%!assert(latent,[1],10*eps);
%!assert(tsquare,[1;1],10*eps)

61
toni/octave/rawPlot.m Normal file
View File

@@ -0,0 +1,61 @@
display("rawPlot")
#load and plot raw data
#{
data structure of cellarray classes:
classes[1 to 5]
samples[1 to trainsetPerClass]
raw[1 to 3]
Accel[start:pEnd]
Gyro[start:pEnd]
Magnet[start:pEnd]
access the cells using classes{u}.samples{v}.raw{w}
#}
source("functions.m");
trainsetPerClass = 6; #number of used trainsets for one class
classes = {};
classes = getRawTrainData(trainsetPerClass);
#outPath = "/home/toni/Documents/handygames/HandyGames/toni/img/raw"
#plotData(classes, outPath);
#calc and plot filtered data
filteredClasses = filterData(classes);
#outPath = "/home/toni/Documents/handygames/HandyGames/toni/img/filtered";
#plotData(filteredClasses, outPath);
#create sliding windows and add 6 additional signals pca and magnitude
#{
data structure of windowedClasses:
classes[1 to 5]
samples[1 to trainsetPerClass]
raw[1 to 9] <--- 9 different signals
WindowsAccel[2.5s at 200ms]
Win, Win, Win, Win ... <--- single matrices
WindowsGyro[2.5s at 200ms]
Win, Win, Win, Win ... <--- single matrices
WindowsMagnet[2.5s at 200ms]
Win, Win, Win, Win ... <--- single matrices
---> add 6 additional sensors: pca and magnitude
3x WindowsPCA (Accel, Gyro, Magnet)
Win, Win, Win, Win ... <--- single matrices
3x WindowsMagnitude (Accel, Gyro, Magnet)
Win, Win, Win, Win ... <--- single matrices
access the cells using classes{u}.samples{v}.raw{w}.wins{}
pca uses the eigenvector with the heighest eigenvalue as axis and projects the signals onto it for each sensor.
magnitude is calculated using sqrt(x^2 + y^2 + z^2) for each sensor.
#}
windowedClasses = windowData(filteredClasses);
#calculated features for the 5 signales (x, y, z, MG, PCA) of a sensor
windowedFeatures = featureCalculation(windowedClasses);
#train svm
#run svm

36
toni/octave/todo.txt Normal file
View File

@@ -0,0 +1,36 @@
daten mal plotten
vorverarbeitung:
low-pass-filter (-60dB at 20hZ)
windowing (5s sliding at 200ms)
segmentation (erstmal weglassen, da daten dafür nicht trainiert. bräuchten trainingsdaten von non-exercise die dann gelabeled sind)
laufen ist z.b. keine übung! (man läuft ja zwischen übungen durch die gegend)
feature computation:
1) nehme 5 signale
x, y, z von acc und gyro
magnitude sqrt(x^2+y^2+z^2) von acc und gyro
pca (erste pca) (projektion der x,y,z koordinaten auf den ersten eigenvektor)
(das sind jetzt viele achsen mit der annahme, das jeder mensch die Uhr an der gleichen Stelle trägt. vielleicht wäre es besser hier nur die "bekannten" achsen im raum zu nutzen, also x-achse vom acc?! vgl. recofit)
2) generiere features für die jeweiligen fenster aus den signalen
autocorrelation features:
autocorrelation bins
energy features:
RMS
power spectrum bin mangnitudes
statistical features:
mean
standard deviation
kurtosis
interquartile range
9 (signales) * 7 (num features per window) * 85 (windows) features
classification:
SVM