123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- function [U,S,V] = pca(A,k,its,l)
- %PCA Low-rank approximation in SVD form.
- %
- %
- % [U,S,V] = PCA(A) constructs a nearly optimal rank-6 approximation
- % USV' to A, using 2 full iterations of a block Lanczos method
- % of block size 6+2=8, started with an n x 8 random matrix,
- % when A is m x n; the ref. below explains "nearly optimal."
- % The smallest dimension of A must be >= 6 when A is
- % the only input to PCA.
- %
- % [U,S,V] = PCA(A,k) constructs a nearly optimal rank-k approximation
- % USV' to A, using 2 full iterations of a block Lanczos method
- % of block size k+2, started with an n x (k+2) random matrix,
- % when A is m x n; the ref. below explains "nearly optimal."
- % k must be a positive integer <= the smallest dimension of A.
- %
- % [U,S,V] = PCA(A,k,its) constructs a nearly optimal rank-k approx. USV'
- % to A, using its full iterations of a block Lanczos method
- % of block size k+2, started with an n x (k+2) random matrix,
- % when A is m x n; the ref. below explains "nearly optimal."
- % k must be a positive integer <= the smallest dimension of A,
- % and its must be a nonnegative integer.
- %
- % [U,S,V] = PCA(A,k,its,l) constructs a nearly optimal rank-k approx.
- % USV' to A, using its full iterates of a block Lanczos method
- % of block size l, started with an n x l random matrix,
- % when A is m x n; the ref. below explains "nearly optimal."
- % k must be a positive integer <= the smallest dimension of A,
- % its must be a nonnegative integer,
- % and l must be a positive integer >= k.
- %
- %
- % The low-rank approximation USV' is in the form of an SVD in the sense
- % that the columns of U are orthonormal, as are the columns of V,
- % the entries of S are all nonnegative, and the only nonzero entries
- % of S appear in non-increasing order on its diagonal.
- % U is m x k, V is n x k, and S is k x k, when A is m x n.
- %
- % Increasing its or l improves the accuracy of the approximation USV'
- % to A; the ref. below describes how the accuracy depends on its and l.
- %
- %
- % Note: PCA invokes RAND. To obtain repeatable results,
- % invoke RAND('seed',j) with a fixed integer j before invoking PCA.
- %
- % Note: PCA currently requires the user to center and normalize the rows
- % or columns of the input matrix A before invoking PCA (if such
- % is desired).
- %
- % Note: Hoping to improve its accuracy, PCA takes the adjoint
- % of the input matrix A if A has more columns than rows. However,
- % the code will produce a correct result without taking an adjoint.
- % If taking the adjoint turns out to be costly in a particular
- % application, then the user might consider deleting the lines
- % below that take the adjoint (assuming its = 0).
- %
- % Note: The user may ascertain the accuracy of the approximation USV'
- % to A by invoking DIFFSNORM(A,U,S,V).
- %
- %
- % inputs (the first is required):
- % A -- matrix being approximated
- % k -- rank of the approximation being constructed;
- % k must be a positive integer <= the smallest dimension of A,
- % and defaults to 6
- % its -- number of full iterations of a block Lanczos method to conduct;
- % its must be a nonnegative integer, and defaults to 2
- % l -- block size of the block Lanczos iterations;
- % l must be a positive integer >= k, and defaults to k+2
- %
- % outputs (all three are required):
- % U -- m x k matrix in the rank-k approximation USV' to A,
- % where A is m x n; the columns of U are orthonormal
- % S -- k x k matrix in the rank-k approximation USV' to A,
- % where A is m x n; the entries of S are all nonnegative,
- % and its only nonzero entries appear in nonincreasing order
- % on the diagonal
- % V -- n x k matrix in the rank-k approximation USV' to A,
- % where A is m x n; the columns of V are orthonormal
- %
- %
- % Example:
- % A = rand(1000,2)*rand(2,1000);
- % A = A/normest(A);
- % [U,S,V] = pca(A,2,0);
- % diffsnorm(A,U,S,V)
- %
- % This code snippet produces a rank-2 approximation USV' to A such that
- % the columns of U are orthonormal, as are the columns of V, and
- % the entries of S are all nonnegative and are zero off the diagonal.
- % diffsnorm(A,U,S,V) outputs an estimate of the spectral norm
- % of A-USV', which should be close to the machine precision.
- %
- %
- % Reference:
- % Vladimir Rokhlin, Arthur Szlam, and Mark Tygert,
- % A randomized algorithm for principal component analysis,
- % arXiv:0809.2274v1 [stat.CO], 2008 (available at http://arxiv.org).
- %
- %
- % See also PCACOV, PRINCOMP, SVDS.
- %
- % Copyright 2008 Mark Tygert.
- %
- % Check the number of inputs.
- %
- if(nargin < 1)
- error('MATLAB:pca:TooFewIn',...
- 'There must be at least 1 input.')
- end
- if(nargin > 4)
- error('MATLAB:pca:TooManyIn',...
- 'There must be at most 4 inputs.')
- end
- %
- % Check the number of outputs.
- %
- if(nargout ~= 3)
- error('MATLAB:pca:WrongNumOut',...
- 'There must be exactly 3 outputs.')
- end
- %
- % Set the inputs k, its, and l to default values, if necessary.
- %
- if(nargin == 1)
- k = 6;
- its = 2;
- l = k+2;
- end
- if(nargin == 2)
- its = 2;
- l = k+2;
- end
- if(nargin == 3)
- l = k+2;
- end
- %
- % Check the first input argument.
- %
- if(~isfloat(A))
- error('MATLAB:pca:In1NotFloat',...
- 'Input 1 must be a floating-point matrix.')
- end
- if(isempty(A))
- error('MATLAB:pca:In1Empty',...
- 'Input 1 must not be empty.')
- end
- %
- % Retrieve the dimensions of A.
- %
- [m n] = size(A);
- %
- % Check the remaining input arguments.
- %
- if(size(k,1) ~= 1 || size(k,2) ~= 1)
- error('MATLAB:pca:In2Not1x1',...
- 'Input 2 must be a scalar.')
- end
- if(size(its,1) ~= 1 || size(its,2) ~= 1)
- error('MATLAB:pca:In3Not1x1',...
- 'Input 3 must be a scalar.')
- end
- if(size(l,1) ~= 1 || size(l,2) ~= 1)
- error('MATLAB:pca:In4Not1x1',...
- 'Input 4 must be a scalar.')
- end
- if(k <= 0)
- error('MATLAB:pca:In2NonPos',...
- 'Input 2 must be > 0.')
- end
- if((k > m) || (k > n))
- error('MATLAB:pca:In2TooBig',...
- 'Input 2 must be <= the smallest dimension of Input 1.')
- end
- if(its < 0)
- error('MATLAB:pca:In3Neg',...
- 'Input 3 must be >= 0.')
- end
- if(l < k)
- error('MATLAB:pca:In4ltIn2',...
- 'Input 4 must be >= Input 2.')
- end
- %
- % SVD A directly if (its+1)*l >= m/1.25 or (its+1)*l >= n/1.25.
- %
- if(((its+1)*l >= m/1.25) || ((its+1)*l >= n/1.25))
- if(~issparse(A))
- [U,S,V] = svd(A,'econ');
- end
- if(issparse(A))
- [U,S,V] = svd(full(A),'econ');
- end
- %
- % Retain only the leftmost k columns of U, the leftmost k columns of V,
- % and the uppermost leftmost k x k block of S.
- %
- U = U(:,1:k);
- V = V(:,1:k);
- S = S(1:k,1:k);
- return
- end
- %
- % Take the adjoint of the matrix if m < n.
- %
- flag = 0;
- if(m < n)
- A = A';
- [m n] = size(A);
- flag = 1;
- end
- %
- % Form the adjoint Aa of A.
- %
- if(its > 0)
- Aa = A';
- end
- %
- % Apply A to a random matrix, obtaining H.
- %
- rand('seed',rand('seed'));
- if(isreal(A))
- H = A*(2*rand(n,l)-ones(n,l));
- end
- if(~isreal(A))
- H = A*( (2*rand(n,l)-ones(n,l)) + i*(2*rand(n,l)-ones(n,l)) );
- end
- rand('twister',rand('twister'));
- %
- % Initialize F to its final size and fill its leftmost block with H.
- %
- F = zeros(m,(its+1)*l);
- F(1:m, 1:l) = H;
- %
- % Apply A*A' to H a total of its times,
- % augmenting F with the new H each time.
- %
- for it = 1:its
- H = Aa*H;
- H = A*H;
- F(1:m, (1+it*l):((it+1)*l)) = H;
- end
- clear Aa H;
- %
- % Form a matrix Q whose columns constitute an orthonormal basis
- % for the columns of F.
- %
- [Q,R,E] = qr(F,0);
- clear F R E;
- %
- % SVD Q'*A to obtain approximations to the singular values
- % and right singular vectors of A; adjust the left singular vectors
- % of Q'*A to approximate the left singular vectors of A.
- %
- [U2,S,V] = svd(Q'*A,'econ');
- U = Q*U2;
- clear Q U2;
- %
- % Retain only the leftmost k columns of U, the leftmost k columns of V,
- % and the uppermost leftmost k x k block of S.
- %
- U = U(:,1:k);
- V = V(:,1:k);
- S = S(1:k,1:k);
- %
- % If flag is nonzero, then swap U and V.
- %
- if(flag ~= 0)
- flag = U;
- U = V;
- V = flag;
- end
- clear flag;
|