added octave files for preprocessing: filtering, windowing and additional signal calculation (3x PCA, 3x Magnitude)

2016-01-06 23:11:38 +01:00
parent 7ce2718306
commit 253df9c777
4 changed files with 442 additions and 0 deletions
--- a/toni/octave/functions.m
+++ b/toni/octave/functions.m
@@ -0,0 +1,169 @@
+display("functions")
+
+function files = getDataFiles(clsName, trainsetPerClass)
+  
+  setDataDir = "/home/toni/Documents/handygames/HandyGames/daten/";
+  
+  dir = strcat(setDataDir, clsName, "/");
+  lst = readdir(dir);
+  files = {};
+  cnt = 0;
+  for i = 1:numel(lst)
+    filename = lst{i};
+    if (regexp(filename, ".m$"))         # use only files ending with .m
+      file = strcat(dir, filename);
+      files = [files file];
+      ++cnt;
+      if (cnt >= trainsetPerClass)           # use only x input files
+        break;
+      endif
+    endif
+  end  
+  
+end
+
+function samples = getSamplesForClass(clsName, trainsetPerClass, start, percent)
+  files = getDataFiles(clsName, trainsetPerClass); #load Data files - thanks franke
+  samples = {};
+  start = start / 10; #start is given in ms and our raw input data has an fixed update-intervall of 10ms
+  for i = 1 : numel(files)
+    source(files{i});
+    lengthSamples = length(Magnet); # length/number of all available samples
+    pEnd = lengthSamples * percent; # how many samples do we want?
+    raw = {Gyro(start:pEnd,:), Accel(start:pEnd,:), Magnet(start:pEnd,:)}; # create cell array of the wanted samples
+    samples{i}.raw = raw; # write them into cell array for each Class
+  end
+  display(strcat("training data for '", clsName, "': ", num2str(length(samples)), " samples"));
+end
+
+function data = getRawTrainData(trainsetPerClass)
+  data = {};
+  data{1}.samples = getSamplesForClass("forwardbend", trainsetPerClass, 10, 0.9);
+  data{2}.samples = getSamplesForClass("kneebend", trainsetPerClass, 10, 0.9);
+  data{3}.samples = getSamplesForClass("pushups", trainsetPerClass, 10, 0.9);
+  data{4}.samples = getSamplesForClass("situps", trainsetPerClass, 10, 0.9);
+  data{5}.samples = getSamplesForClass("jumpingjack", trainsetPerClass, 10, 0.9);
+end
+
+function plotData(data,outPath)
+  
+  for k = 1:numel(data)
+    for j = 1:numel(data{k}.samples)
+      for i = 1:numel(data{k}.samples{j}.raw)
+        mat = data{k}.samples{j}.raw{i};
+        #subplot(6, 3, idx);
+        plot(mat);
+        legend("x","y","z");
+        print( gcf, '-dpng', fullfile(outPath, sprintf('img_%d_%d_%d.png', k, j, i) ) ); 
+      end
+    end
+  end
+end
+
+function filteredData = filterData(data)
+  pkg load signal;
+  filteredData = {};
+  for k = 1:numel(data)
+    for j = 1:numel(data{k}.samples)
+      for i = 1:numel(data{k}.samples{j}.raw)
+        mat = data{k}.samples{j}.raw{i};
+        [b, a] = butter(16, 0.2);
+        filteredData{k}.samples{j}.raw{i} = filtfilt(b, a, mat); #filtfilt extends the signal at beginning so we have no phaseshift
+      end
+    end
+  end        
+end
+
+function win = window(rawVec, posMS)
+  pos = posMS / 10;
+  #check if out of bounce, if yes- fill with zeros
+  #the last windows are sometimes filled with a lot of zeros... this could cause problems
+  if length(rawVec) <= pos+100-1
+    #win = rawVec(pos-100:length(rawVec),:);
+    #fillOut = zeros((pos+100-1) - length(rawVec), 3);
+    #length(fillOut)
+    #win = [win; fillOut];
+    win = [];
+  else  
+    win = rawVec(pos-100:pos+100-1,:); #100*10 ms is half the window size. that is 2s for each window. 
+  endif
+end
+
+function out = getMagnitude(data)
+
+  out = [];
+  for i = 1:length(data) 
+    tmp = sqrt(data(i,1)^2 + data(i,2)^2 + data(i,3)^2);
+    out = [out; tmp];
+  end
+
+end
+
+function windowedData = windowData(data)
+windowedData = {};
+
+  for k = 1:numel(data)
+    for j = 1:numel(data{k}.samples)
+    
+      winsAccel = {};
+      winsGyro = {};
+      winsMagnet = {};
+      
+      winsAccelPCA = {};
+      winsGyroPCA = {};
+      winsMagnetPCA = {};
+      
+      winsAccelMG = {};
+      winsGyroMG = {};
+      winsMagnetMG = {};
+      
+      winEnd = length(data{k}.samples{j}.raw{1}) * 10; # *10ms 
+      
+      for i = 1010:200:winEnd-1010 #200ms steps for sliding/overlapping windows
+        
+        #accel
+        winAccel = window(data{k}.samples{j}.raw{1}, i);
+        winsAccel = [winsAccel winAccel];
+        
+        [coeff1, score1] = princomp(winAccel);
+        winsAccelPCA = [winsAccelPCA score1(:,1)];  #choose the first axis (eigvec with the biggest eigvalue)
+        
+        winAccelMG = getMagnitude(winAccel);
+        winsAccelMG = [winsAccelMG winAccelMG];
+        
+        #gyro
+        winGyro = window(data{k}.samples{j}.raw{2}, i);
+        winsGyro = [winsGyro winGyro];
+        
+        [coeff2, score2] = princomp(winGyro);
+        winsGyroPCA = [winsGyroPCA score2(:,1)];
+        
+        winGyroMG = getMagnitude(winGyro);
+        winsGyroMG = [winsGyroMG winGyroMG];
+        
+        #magnet
+        winMagnet = window(data{k}.samples{j}.raw{3}, i);
+        winsMagnet = [winsMagnet winMagnet];
+        
+        [coeff3, score3] = princomp(winMagnet);
+        winsMagnetPCA = [winsMagnetPCA score3(:,1)];
+        
+        winMagnetMG = getMagnitude(winMagnet);
+        winsMagnetMG = [winsMagnetMG winMagnetMG];
+                
+      end
+      
+      #write back data
+      windowedData{k}.samples{j}.raw{1}.wins = winsAccel;
+      windowedData{k}.samples{j}.raw{2}.wins = winsGyro;
+      windowedData{k}.samples{j}.raw{3}.wins = winsMagnet; 
+      windowedData{k}.samples{j}.raw{4}.wins = winsAccelPCA;
+      windowedData{k}.samples{j}.raw{5}.wins = winsGyroPCA;
+      windowedData{k}.samples{j}.raw{6}.wins = winsMagnetPCA;          
+      windowedData{k}.samples{j}.raw{7}.wins = winsAccelMG;
+      windowedData{k}.samples{j}.raw{8}.wins = winsGyroMG;
+      windowedData{k}.samples{j}.raw{9}.wins = winsMagnetMG;         
+    end
+  end  
+end
+
--- a/toni/octave/princomp.m
+++ b/toni/octave/princomp.m
@@ -0,0 +1,176 @@
+## Copyright (C) 2013 Fernando Damian Nieuwveldt <fdnieuwveldt@gmail.com>
+##
+## This program is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License
+## as published by the Free Software Foundation; either version 3
+## of the License, or (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {[@var{COEFF}]} = princomp(@var{X})
+## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE}]} = princomp(@var{X})
+## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent}]} = princomp(@var{X})
+## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent},@var{tsquare}]} = princomp(@var{X})
+## @deftypefnx {Function File} {[...]} = princomp(@var{X},'econ')
+## @itemize @bullet
+## @item
+## princomp performs principal component analysis on a NxP data matrix X
+## @item
+## @var{COEFF} : returns the principal component coefficients
+## @item
+## @var{SCORE} : returns the principal component scores, the representation of X 
+## in the principal component space
+## @item
+## @var{LATENT} : returns the principal component variances, i.e., the 
+## eigenvalues of the covariance matrix X.
+## @item
+## @var{TSQUARE} : returns Hotelling's T-squared Statistic for each observation in X 
+## @item
+## [...] = princomp(X,'econ') returns only the elements of latent that are not 
+## necessarily zero, and the corresponding columns of COEFF and SCORE, that is, 
+## when n <= p, only the first n-1. This can be significantly faster when p is 
+## much larger than n. In this case the svd will be applied on the transpose of 
+## the data matrix X
+##
+## @end itemize
+##
+## @subheading References
+##
+## @enumerate
+## @item
+## Jolliffe, I. T., Principal Component Analysis, 2nd Edition, Springer, 2002
+## 
+## @end enumerate
+## @end deftypefn
+
+function [COEFF,SCORE,latent,tsquare] = princomp(X,varargin)
+
+   if (nargin < 1 || nargin > 2)
+      print_usage ();
+   endif
+	
+   if (nargin == 2 && ! strcmpi (varargin{:}, "econ"))
+      error ("princomp: if a second input argument is present, it must be the string  'econ'");
+   endif
+
+   [nobs nvars] = size(X);
+	 
+   # Center the columns to mean zero
+   Xcentered = bsxfun(@minus,X,mean(X));
+
+   # Check if there are more variables then observations
+   if nvars <= nobs
+		
+      [U,S,COEFF] = svd(Xcentered, "econ");
+
+   else
+
+      # Calculate the svd on the transpose matrix, much faster
+      if (nargin == 2 && strcmpi ( varargin{:} , "econ"))
+	     [COEFF,S,V] = svd(Xcentered' , 'econ');
+      else
+	     [COEFF,S,V] = svd(Xcentered');
+      endif	
+
+   endif
+
+   if nargout > 1
+
+      # Get the Scores
+      SCORE = Xcentered*COEFF;
+	
+      # Get the rank of the SCORE matrix	
+      r = rank(SCORE); 
+
+      # Only use the first r columns, pad rest with zeros if economy != 'econ'
+      SCORE = SCORE(:,1:r) ; 
+	 
+      if !(nargin == 2 && strcmpi ( varargin{:} , "econ"))
+	    SCORE = [SCORE, zeros(nobs , nvars-r)];
+      else
+	    COEFF   = COEFF(: , 1:r);   
+      endif
+
+    endif
+
+    if nargout > 2
+
+      # This is the same as the eigenvalues of the covariance matrix of X
+      latent  = (diag(S'*S)/(size(Xcentered,1)-1))(1:r);
+
+      if !(nargin == 2 && strcmpi ( varargin{:} , "econ"))
+	  latent= [latent;zeros(nvars-r,1)];
+      endif
+    endif
+
+    if nargout > 3
+ 	# Calculate the Hotelling T-Square statistic for the observations
+	tsquare = sumsq(zscore(SCORE(:,1:r)),2);
+    endif
+
+endfunction
+
+%!shared COEFF,SCORE,latent,tsquare,m,x,R,V,lambda,i,S,F
+
+#NIST Engineering Statistics Handbook example (6.5.5.2)
+%!test
+%! x=[7 4 3
+%!    4 1 8
+%!    6 3 5
+%!    8 6 1
+%!    8 5 7
+%!    7 2 9
+%!    5 3 3
+%!    9 5 8
+%!    7 4 5
+%!    8 2 2];
+%! R = corrcoef (x);
+%! [V, lambda] = eig (R);
+%! [~, i] = sort(diag(lambda), "descend"); #arrange largest PC first
+%! S = V(:, i) * diag(sqrt(diag(lambda)(i)));
+%!assert(diag(S(:, 1:2)*S(:, 1:2)'), [0.8662; 0.8420; 0.9876], 1E-4); #contribution of first 2 PCs to each original variable
+%! B = V(:, i) * diag( 1./ sqrt(diag(lambda)(i)));
+%! F = zscore(x)*B;
+%! [COEFF,SCORE,latent,tsquare] = princomp(zscore(x, 1));
+%!assert(tsquare,sumsq(F, 2),1E4*eps);
+
+%!test
+%! x=[1,2,3;2,1,3]';
+%! [COEFF,SCORE,latent,tsquare] = princomp(x);
+%! m=[sqrt(2),sqrt(2);sqrt(2),-sqrt(2);-2*sqrt(2),0]/2;
+%! m(:,1) = m(:,1)*sign(COEFF(1,1));
+%! m(:,2) = m(:,2)*sign(COEFF(1,2));
+
+%!assert(COEFF,m(1:2,:),10*eps);
+%!assert(SCORE,-m,10*eps);
+%!assert(latent,[1.5;.5],10*eps);
+%!assert(tsquare,[2;2;2],10*eps);
+
+%!test
+%! x=x';
+%! [COEFF,SCORE,latent,tsquare] = princomp(x);
+%! m=[sqrt(2),sqrt(2),0;-sqrt(2),sqrt(2),0;0,0,2]/2;
+%! m(:,1) = m(:,1)*sign(COEFF(1,1));
+%! m(:,2) = m(:,2)*sign(COEFF(1,2));
+%! m(:,3) = m(:,3)*sign(COEFF(3,3));
+
+%!assert(COEFF,m,10*eps);
+%!assert(SCORE(:,1),-m(1:2,1),10*eps);
+%!assert(SCORE(:,2:3),zeros(2),10*eps);
+%!assert(latent,[1;0;0],10*eps);
+%!assert(tsquare,[1;1],10*eps)
+
+%!test
+%! [COEFF,SCORE,latent,tsquare] = princomp(x, "econ");
+
+%!assert(COEFF,m(:, 1),10*eps);
+%!assert(SCORE,-m(1:2,1),10*eps);
+%!assert(latent,[1],10*eps);
+%!assert(tsquare,[1;1],10*eps)
+
--- a/toni/octave/rawPlot.m
+++ b/toni/octave/rawPlot.m
@@ -0,0 +1,61 @@
+display("rawPlot")
+
+#load and plot raw data
+#{
+data structure of cellarray classes:
+classes[1 to 5]
+  samples[1 to trainsetPerClass]
+    raw[1 to 3] 
+      Accel[start:pEnd]
+      Gyro[start:pEnd]
+      Magnet[start:pEnd]
+      
+access the cells using classes{u}.samples{v}.raw{w}
+#}
+source("functions.m");
+
+trainsetPerClass = 6; #number of used trainsets for one class
+classes = {};
+classes = getRawTrainData(trainsetPerClass);
+
+#outPath = "/home/toni/Documents/handygames/HandyGames/toni/img/raw"
+#plotData(classes, outPath);
+
+#calc and plot filtered data
+filteredClasses = filterData(classes);
+
+#outPath = "/home/toni/Documents/handygames/HandyGames/toni/img/filtered";
+#plotData(filteredClasses, outPath);
+
+#create sliding windows and add 6 additional signals pca and magnitude
+#{
+data structure of windowedClasses:
+classes[1 to 5]
+  samples[1 to trainsetPerClass]
+    raw[1 to 9] <--- 9 different signals
+      WindowsAccel[2.5s at 200ms] 
+        Win, Win, Win, Win ... <--- single matrices
+      WindowsGyro[2.5s at 200ms]
+        Win, Win, Win, Win ... <--- single matrices
+      WindowsMagnet[2.5s at 200ms]
+        Win, Win, Win, Win ... <--- single matrices
+        
+    ---> add 6 additional sensors: pca and magnitude    
+      3x WindowsPCA (Accel, Gyro, Magnet)
+        Win, Win, Win, Win ... <--- single matrices
+      3x WindowsMagnitude (Accel, Gyro, Magnet)
+        Win, Win, Win, Win ... <--- single matrices  
+        
+access the cells using classes{u}.samples{v}.raw{w}.wins{}
+
+pca uses the eigenvector with the heighest eigenvalue as axis and projects the signals onto it for each sensor. 
+magnitude is calculated using sqrt(x^2 + y^2 + z^2) for each sensor. 
+#}
+windowedClasses = windowData(filteredClasses);
+
+#calculated features for the 5 signales (x, y, z, MG, PCA) of a sensor
+windowedFeatures = featureCalculation(windowedClasses);
+
+#train svm
+
+#run svm
--- a/toni/octave/todo.txt
+++ b/toni/octave/todo.txt
@@ -0,0 +1,36 @@
+daten mal plotten
+
+vorverarbeitung:
+low-pass-filter (-60dB at 20hZ)
+windowing (5s sliding at 200ms)
+
+segmentation (erstmal weglassen, da daten dafür nicht trainiert. bräuchten trainingsdaten von non-exercise die dann gelabeled sind)
+laufen ist z.b. keine übung! (man läuft ja zwischen übungen durch die gegend)
+
+feature computation:
+
+1) nehme 5 signale 
+x, y, z von acc und gyro
+magnitude sqrt(x^2+y^2+z^2) von acc und gyro
+pca (erste pca) (projektion der x,y,z koordinaten auf den ersten eigenvektor)
+
+(das sind jetzt viele achsen mit der annahme, das jeder mensch die Uhr an der gleichen Stelle trägt. vielleicht wäre es besser hier nur die "bekannten" achsen im raum zu nutzen, also x-achse vom acc?! vgl. recofit)
+
+2) generiere features für die jeweiligen fenster aus den signalen
+autocorrelation features: 
+autocorrelation bins
+
+energy features:
+RMS
+power spectrum bin mangnitudes
+
+statistical features:
+mean
+standard deviation
+kurtosis
+interquartile range
+
+9 (signales) * 7 (num features per window) * 85 (windows) features
+
+classification:
+SVM