winsor.m 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. function[y,varargout] = winsor(x,p)
  2. % WINSOR Winsorize a vector
  3. % INPUTS : x - n*1 data vector
  4. % p - 2*1 vector of cut-off percentiles (left, right)
  5. % OUTPUTS : y - winsorized x, n*1 vector
  6. % i - (optional) n*1 value-replaced-indicator vector
  7. % NOTES : Let p1 = prctile(x,p(1)), p2 = prctile(x,p(2)). (Note
  8. % that PRCTILE ignores NaN values). Then
  9. % if x(i) < p1, y(i) = min(x(j) | x(j) >= p1)
  10. % if x(i) > p2, y(i) = max(x(j) | x(j) <= p2)
  11. % EXAMPLE : x = rand(10,1), y = winsor(x,[10 90])
  12. % AUTHOR : Dimitri Shvorob, dimitri.shvorob@vanderbilt.edu, 4/15/07
  13. if ~isvector(x)
  14. error('Input argument "x" must be a vector')
  15. end
  16. if nargin < 2
  17. error('Input argument "p" is undefined')
  18. end
  19. if ~isvector(p)
  20. error('Input argument "p" must be a vector')
  21. end
  22. if length(p) ~= 2
  23. error('Input argument "p" must be a 2*1 vector')
  24. end
  25. if p(1) < 0 || p(1) > 100
  26. error('Left cut-off percentile is out of [0,100] range')
  27. end
  28. if p(2) < 0 || p(2) > 100
  29. error('Right cut-off percentile is out of [0,100] range')
  30. end
  31. if p(1) > p(2)
  32. error('Left cut-off percentile exceeds right cut-off percentile')
  33. end
  34. p = prctile(x,p);
  35. i1 = x < p(1); v1 = min(x(~i1));
  36. i2 = x > p(2); v2 = max(x(~i2));
  37. y = x;
  38. y(i1) = v1;
  39. y(i2) = v2;
  40. if nargout > 1
  41. varargout(1) = {i1 | i2};
  42. end