testboxplot.m 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. function [Q1 Q3 outlier_num] = testboxplot(x, k, tukey, bplot)
  2. %
  3. % The 'testboxplot' identifies outliers that lie 'k' times over the
  4. % interquantile rante (Q3-Q1). A boxplot using the standard Matlab function
  5. % 'boxplot' is also provided by default.
  6. %
  7. % Data in 'x' are organized so that columns are the time series and rows
  8. % are the time intervals. All series contain the same number of
  9. % observations.
  10. %
  11. % 'x_date' is a column vector (cell array) containing the dates of the
  12. % corresponding data in 'x'.
  13. %
  14. % The variable 'k' is a multiplier of the interquantile range (Q3-Q1). The
  15. % default value is 2.
  16. %
  17. % The function also provides an option ('tukey') to use the quantile
  18. % computation suggested by Tukey. It can take the default value 0 (not
  19. % applied) or 1 (apply Tukey approach).
  20. %
  21. % The variable 'bplot' indicates if a boxplot is provided (=1, default) or
  22. % omitted (=0).
  23. %
  24. % [Q1 Q3 outlier outlier_num] = testboxplot(...) returns the following information:
  25. % Q1 - 25% quantile
  26. % Q3 - 75% quantile
  27. % outlier - cell array specifying the date and the series number (column
  28. % number in 'x') where the potential outliers are situated.
  29. % outlier_num - matrix providing row and column numbers of the values in
  30. % 'x' considered as potential outliers.
  31. %
  32. % Created by Francisco Augusto Alcaraz Garcia
  33. % alcaraz_garcia@yahoo.com
  34. %
  35. % References:
  36. %
  37. % 1) B. Iglewicz; D.C. Hoaglin (1993). How to Detect and Handle Outliers.
  38. % ASQC Basic References in Quality Control, vol. 16, Wisconsin, US.
  39. %
  40. % 2) J.W. Tukey (1977). Exploratory Data Analysis. Addison Wesley.
  41. % Check number of input arguements
  42. if (nargin < 1) || (nargin > 4)
  43. error('Requires two to five input arguments.')
  44. end
  45. % Define default values
  46. if nargin == 1,
  47. k = 2;
  48. tukey = 0;
  49. bplot = 1;
  50. elseif nargin == 2,
  51. tukey = 0;
  52. bplot = 1;
  53. elseif nargin == 3,
  54. bplot = 1;
  55. end
  56. % Check for validity of inputs
  57. if ~isnumeric(x)
  58. error('Input x must be a numeric array')
  59. end
  60. [n, c] = size(x);
  61. if tukey == 0,
  62. Q1 = quantile(x, 0.25);
  63. Q3 = quantile(x, 0.75);
  64. else
  65. [xsort, ix] = sort(x);
  66. f = ((n+1)/2+1)/2;
  67. if isinteger(f),
  68. Q1 = xsort(f,:);
  69. Q3 = xsort(end+1-f,:);
  70. else
  71. Q1 = (xsort(fix(f),:) + xsort(fix(f)+1,:))/2;
  72. Q3 = (xsort(end+1-fix(f),:) + xsort(end+1-fix(f)+1,:))/2;
  73. end
  74. end
  75. [i1,j1] = find(x < repmat(Q1-k*(Q3-Q1),n,1));
  76. [i2,j2] = find(x > repmat(Q3+k*(Q3-Q1),n,1));
  77. if (isempty(i1)+isempty(i2)) == 0,
  78. outlier_num = [i1 j1; i2 j2];
  79. elseif isempty(i1),
  80. outlier_num = [i2 j2];
  81. elseif isempty(i2),
  82. outlier_num = [i1 j1];
  83. else
  84. outlier_num = ('No outliers have been identified!');
  85. end
  86. if bplot == 1,
  87. boxplot(x, 'notch', 'on', 'whisker', k, 'symbol', 'r.')
  88. end