177 lines
5.3 KiB
Matlab
177 lines
5.3 KiB
Matlab
## Copyright (C) 2013 Fernando Damian Nieuwveldt <fdnieuwveldt@gmail.com>
|
|
##
|
|
## This program is free software; you can redistribute it and/or
|
|
## modify it under the terms of the GNU General Public License
|
|
## as published by the Free Software Foundation; either version 3
|
|
## of the License, or (at your option) any later version.
|
|
##
|
|
## This program is distributed in the hope that it will be useful,
|
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
## GNU General Public License for more details.
|
|
##
|
|
## You should have received a copy of the GNU General Public License along with
|
|
## this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
## -*- texinfo -*-
|
|
## @deftypefn {Function File} {[@var{COEFF}]} = princomp(@var{X})
|
|
## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE}]} = princomp(@var{X})
|
|
## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent}]} = princomp(@var{X})
|
|
## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent},@var{tsquare}]} = princomp(@var{X})
|
|
## @deftypefnx {Function File} {[...]} = princomp(@var{X},'econ')
|
|
## @itemize @bullet
|
|
## @item
|
|
## princomp performs principal component analysis on a NxP data matrix X
|
|
## @item
|
|
## @var{COEFF} : returns the principal component coefficients
|
|
## @item
|
|
## @var{SCORE} : returns the principal component scores, the representation of X
|
|
## in the principal component space
|
|
## @item
|
|
## @var{LATENT} : returns the principal component variances, i.e., the
|
|
## eigenvalues of the covariance matrix X.
|
|
## @item
|
|
## @var{TSQUARE} : returns Hotelling's T-squared Statistic for each observation in X
|
|
## @item
|
|
## [...] = princomp(X,'econ') returns only the elements of latent that are not
|
|
## necessarily zero, and the corresponding columns of COEFF and SCORE, that is,
|
|
## when n <= p, only the first n-1. This can be significantly faster when p is
|
|
## much larger than n. In this case the svd will be applied on the transpose of
|
|
## the data matrix X
|
|
##
|
|
## @end itemize
|
|
##
|
|
## @subheading References
|
|
##
|
|
## @enumerate
|
|
## @item
|
|
## Jolliffe, I. T., Principal Component Analysis, 2nd Edition, Springer, 2002
|
|
##
|
|
## @end enumerate
|
|
## @end deftypefn
|
|
|
|
function [COEFF,SCORE,latent,tsquare] = princomp(X,varargin)
|
|
|
|
if (nargin < 1 || nargin > 2)
|
|
print_usage ();
|
|
endif
|
|
|
|
if (nargin == 2 && ! strcmpi (varargin{:}, "econ"))
|
|
error ("princomp: if a second input argument is present, it must be the string 'econ'");
|
|
endif
|
|
|
|
[nobs nvars] = size(X);
|
|
|
|
# Center the columns to mean zero
|
|
Xcentered = bsxfun(@minus,X,mean(X));
|
|
|
|
# Check if there are more variables then observations
|
|
if nvars <= nobs
|
|
|
|
[U,S,COEFF] = svd(Xcentered, "econ");
|
|
|
|
else
|
|
|
|
# Calculate the svd on the transpose matrix, much faster
|
|
if (nargin == 2 && strcmpi ( varargin{:} , "econ"))
|
|
[COEFF,S,V] = svd(Xcentered' , 'econ');
|
|
else
|
|
[COEFF,S,V] = svd(Xcentered');
|
|
endif
|
|
|
|
endif
|
|
|
|
if nargout > 1
|
|
|
|
# Get the Scores
|
|
SCORE = Xcentered*COEFF;
|
|
|
|
# Get the rank of the SCORE matrix
|
|
r = rank(SCORE);
|
|
|
|
# Only use the first r columns, pad rest with zeros if economy != 'econ'
|
|
SCORE = SCORE(:,1:r) ;
|
|
|
|
if !(nargin == 2 && strcmpi ( varargin{:} , "econ"))
|
|
SCORE = [SCORE, zeros(nobs , nvars-r)];
|
|
else
|
|
COEFF = COEFF(: , 1:r);
|
|
endif
|
|
|
|
endif
|
|
|
|
if nargout > 2
|
|
|
|
# This is the same as the eigenvalues of the covariance matrix of X
|
|
latent = (diag(S'*S)/(size(Xcentered,1)-1))(1:r);
|
|
|
|
if !(nargin == 2 && strcmpi ( varargin{:} , "econ"))
|
|
latent= [latent;zeros(nvars-r,1)];
|
|
endif
|
|
endif
|
|
|
|
if nargout > 3
|
|
# Calculate the Hotelling T-Square statistic for the observations
|
|
tsquare = sumsq(zscore(SCORE(:,1:r)),2);
|
|
endif
|
|
|
|
endfunction
|
|
|
|
%!shared COEFF,SCORE,latent,tsquare,m,x,R,V,lambda,i,S,F
|
|
|
|
#NIST Engineering Statistics Handbook example (6.5.5.2)
|
|
%!test
|
|
%! x=[7 4 3
|
|
%! 4 1 8
|
|
%! 6 3 5
|
|
%! 8 6 1
|
|
%! 8 5 7
|
|
%! 7 2 9
|
|
%! 5 3 3
|
|
%! 9 5 8
|
|
%! 7 4 5
|
|
%! 8 2 2];
|
|
%! R = corrcoef (x);
|
|
%! [V, lambda] = eig (R);
|
|
%! [~, i] = sort(diag(lambda), "descend"); #arrange largest PC first
|
|
%! S = V(:, i) * diag(sqrt(diag(lambda)(i)));
|
|
%!assert(diag(S(:, 1:2)*S(:, 1:2)'), [0.8662; 0.8420; 0.9876], 1E-4); #contribution of first 2 PCs to each original variable
|
|
%! B = V(:, i) * diag( 1./ sqrt(diag(lambda)(i)));
|
|
%! F = zscore(x)*B;
|
|
%! [COEFF,SCORE,latent,tsquare] = princomp(zscore(x, 1));
|
|
%!assert(tsquare,sumsq(F, 2),1E4*eps);
|
|
|
|
%!test
|
|
%! x=[1,2,3;2,1,3]';
|
|
%! [COEFF,SCORE,latent,tsquare] = princomp(x);
|
|
%! m=[sqrt(2),sqrt(2);sqrt(2),-sqrt(2);-2*sqrt(2),0]/2;
|
|
%! m(:,1) = m(:,1)*sign(COEFF(1,1));
|
|
%! m(:,2) = m(:,2)*sign(COEFF(1,2));
|
|
|
|
%!assert(COEFF,m(1:2,:),10*eps);
|
|
%!assert(SCORE,-m,10*eps);
|
|
%!assert(latent,[1.5;.5],10*eps);
|
|
%!assert(tsquare,[2;2;2],10*eps);
|
|
|
|
%!test
|
|
%! x=x';
|
|
%! [COEFF,SCORE,latent,tsquare] = princomp(x);
|
|
%! m=[sqrt(2),sqrt(2),0;-sqrt(2),sqrt(2),0;0,0,2]/2;
|
|
%! m(:,1) = m(:,1)*sign(COEFF(1,1));
|
|
%! m(:,2) = m(:,2)*sign(COEFF(1,2));
|
|
%! m(:,3) = m(:,3)*sign(COEFF(3,3));
|
|
|
|
%!assert(COEFF,m,10*eps);
|
|
%!assert(SCORE(:,1),-m(1:2,1),10*eps);
|
|
%!assert(SCORE(:,2:3),zeros(2),10*eps);
|
|
%!assert(latent,[1;0;0],10*eps);
|
|
%!assert(tsquare,[1;1],10*eps)
|
|
|
|
%!test
|
|
%! [COEFF,SCORE,latent,tsquare] = princomp(x, "econ");
|
|
|
|
%!assert(COEFF,m(:, 1),10*eps);
|
|
%!assert(SCORE,-m(1:2,1),10*eps);
|
|
%!assert(latent,[1],10*eps);
|
|
%!assert(tsquare,[1;1],10*eps)
|
|
|